diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md new file mode 100644 index 00000000000..c322ccc92ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md @@ -0,0 +1,29 @@ +--- +name: Installation issue +about: Issue with ClickHouse installation from https://clickhouse.com/docs/en/install/ +title: '' +labels: comp-install +assignees: '' + +--- + +**Installation type** + +Packages, docker, single binary, curl? + +**Source of the ClickHouse** + +A link to the source. Or the command you've tried + +**Expected result** + +What you expected + +**The actual result** + +What you get + +**How to reproduce** + +* For Linux-based operating systems: provide a script for clear docker container from the official image +* For anything else: steps to reproduce on as much as possible clear system diff --git a/.gitignore b/.gitignore index af4615a8e6c..09d3f4a4e33 100644 --- a/.gitignore +++ b/.gitignore @@ -80,6 +80,7 @@ core vgcore* *.deb +*.tar.zst *.build *.upload *.changes diff --git a/.gitmodules b/.gitmodules index abd29c38846..293029ad171 100644 --- a/.gitmodules +++ b/.gitmodules @@ -287,3 +287,6 @@ [submodule "contrib/corrosion"] path = contrib/corrosion url = https://github.com/corrosion-rs/corrosion.git +[submodule "contrib/morton-nd"] + path = contrib/morton-nd + url = https://github.com/morton-nd/morton-nd diff --git a/CHANGELOG.md b/CHANGELOG.md index 83c1cbf1eb4..22f6afc4901 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
**[ClickHouse release v22.7, 2022-07-21](#227)**
@@ -10,6 +11,136 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.10, 2022-10-26 + +#### Backward Incompatible Change +* Rename cache commands: `show caches` -> `show filesystem caches`, `describe cache` -> `describe filesystem cache`. [#41508](https://github.com/ClickHouse/ClickHouse/pull/41508) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove support for the `WITH TIMEOUT` section for `LIVE VIEW`. This closes [#40557](https://github.com/ClickHouse/ClickHouse/issues/40557). [#42173](https://github.com/ClickHouse/ClickHouse/pull/42173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove support for the `{database}` macro from the client's prompt. It was displayed incorrectly if the database was unspecified and it was not updated on `USE` statements. This closes [#25891](https://github.com/ClickHouse/ClickHouse/issues/25891). [#42508](https://github.com/ClickHouse/ClickHouse/pull/42508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Composable protocol configuration is added. Now different protocols can be set up with different listen hosts. Protocol wrappers such as PROXYv1 can be set up over any other protocols (TCP, TCP secure, MySQL, Postgres). [#41198](https://github.com/ClickHouse/ClickHouse/pull/41198) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add `S3` as a new type of the destination of backups. Support BACKUP to S3 with as-is path/data structure. [#42333](https://github.com/ClickHouse/ClickHouse/pull/42333) ([Vitaly Baranov](https://github.com/vitlibar)), [#42232](https://github.com/ClickHouse/ClickHouse/pull/42232) ([Azat Khuzhin](https://github.com/azat)). +* Added functions (`randUniform`, `randNormal`, `randLogNormal`, `randExponential`, `randChiSquared`, `randStudentT`, `randFisherF`, `randBernoulli`, `randBinomial`, `randNegativeBinomial`, `randPoisson`) to generate random values according to the specified distributions. This closes [#21834](https://github.com/ClickHouse/ClickHouse/issues/21834). [#42411](https://github.com/ClickHouse/ClickHouse/pull/42411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* An improvement for ClickHouse Keeper: add support for uploading snapshots to S3. S3 information can be defined inside `keeper_server.s3_snapshot`. [#41342](https://github.com/ClickHouse/ClickHouse/pull/41342) ([Antonio Andelic](https://github.com/antonio2368)). +* Added an aggregate function `analysisOfVariance` (`anova`) to perform a statistical test over several groups of normally distributed observations to find out whether all groups have the same mean or not. Original PR [#37872](https://github.com/ClickHouse/ClickHouse/issues/37872). [#42131](https://github.com/ClickHouse/ClickHouse/pull/42131) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support limiting of temporary data stored on disk using settings `max_temporary_data_on_disk_size_for_user`/`max_temporary_data_on_disk_size_for_query` . [#40893](https://github.com/ClickHouse/ClickHouse/pull/40893) ([Vladimir C](https://github.com/vdimir)). +* Add setting `format_json_object_each_row_column_for_object_name` to write/parse object name as column value in JSONObjectEachRow format. [#41703](https://github.com/ClickHouse/ClickHouse/pull/41703) ([Kruglov Pavel](https://github.com/Avogar)). +* Add BLAKE3 hash-function to SQL. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). +* The function `javaHash` has been extended to integers. [#41131](https://github.com/ClickHouse/ClickHouse/pull/41131) ([JackyWoo](https://github.com/JackyWoo)). +* Add OpenTelemetry support to ON CLUSTER DDL (require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)). +* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for methods `lz4`, `bz2`, `snappy` in HTTP's `Accept-Encoding` which is a non-standard extension to HTTP protocol. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Experimental Feature +* Added new infrastructure for query analysis and planning under the `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)). +* Initial implementation of Kusto Query Language. Please don't use it. [#37961](https://github.com/ClickHouse/ClickHouse/pull/37961) ([Yong Wang](https://github.com/kashwy)). + +#### Performance Improvement +* Relax the "Too many parts" threshold. This closes [#6551](https://github.com/ClickHouse/ClickHouse/issues/6551). Now ClickHouse will allow more parts in a partition if the average part size is large enough (at least 10 GiB). This allows to have up to petabytes of data in a single partition of a single table on a single server, which is possible using disk shelves or object storage. [#42002](https://github.com/ClickHouse/ClickHouse/pull/42002) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Implement operator precedence element parser to make the required stack size smaller. [#34892](https://github.com/ClickHouse/ClickHouse/pull/34892) ([Nikolay Degterinsky](https://github.com/evillique)). +* DISTINCT in order optimization leverage sorting properties of data streams. This improvement will enable reading in order for DISTINCT if applicable (before it was necessary to provide ORDER BY for columns in DISTINCT). [#41014](https://github.com/ClickHouse/ClickHouse/pull/41014) ([Igor Nikonov](https://github.com/devcrafter)). +* ColumnVector: optimize UInt8 index with AVX512VBMI. [#41247](https://github.com/ClickHouse/ClickHouse/pull/41247) ([Guo Wangyang](https://github.com/guowangy)). +* Optimize the lock contentions for `ThreadGroupStatus::mutex`. The performance experiments of **SSB** (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could bring a **2.95x** improvement of the geomean of all subcases' QPS. [#41675](https://github.com/ClickHouse/ClickHouse/pull/41675) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Add `ldapr` capabilities to AArch64 builds. This is supported from Graviton 2+, Azure and GCP instances. Only appeared in clang-15 [not so long ago](https://github.com/llvm/llvm-project/commit/9609b5daffe9fd28d83d83da895abc5113f76c24). [#41778](https://github.com/ClickHouse/ClickHouse/pull/41778) ([Daniel Kutenin](https://github.com/danlark1)). +* Improve performance when comparing strings and one argument is an empty constant string. [#41870](https://github.com/ClickHouse/ClickHouse/pull/41870) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize `insertFrom` of ColumnAggregateFunction to share Aggregate State in some cases. [#41960](https://github.com/ClickHouse/ClickHouse/pull/41960) ([flynn](https://github.com/ucasfl)). +* Make writing to `azure_blob_storage` disks faster (respect `max_single_part_upload_size` instead of writing a block per each buffer size). Inefficiency mentioned in [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42041](https://github.com/ClickHouse/ClickHouse/pull/42041) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make thread ids in the process list and query_log unique to avoid waste. [#42180](https://github.com/ClickHouse/ClickHouse/pull/42180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support skipping cache completely (both download to cache and reading cached data) in case the requested read range exceeds the threshold defined by cache setting `bypass_cache_threashold`, requires to be enabled with `enable_bypass_cache_with_threshold`). [#42418](https://github.com/ClickHouse/ClickHouse/pull/42418) ([Han Shukai](https://github.com/KinderRiven)). This helps on slow local disks. + +#### Improvement +* Add setting `allow_implicit_no_password`: in combination with `allow_no_password` it forbids creating a user with no password unless `IDENTIFIED WITH no_password` is explicitly specified. [#41341](https://github.com/ClickHouse/ClickHouse/pull/41341) ([Nikolay Degterinsky](https://github.com/evillique)). +* Embedded Keeper will always start in the background allowing ClickHouse to start without achieving quorum. [#40991](https://github.com/ClickHouse/ClickHouse/pull/40991) ([Antonio Andelic](https://github.com/antonio2368)). +* Made reestablishing a new connection to ZooKeeper more reactive in case of expiration of the previous one. Previously there was a task which spawns every minute by default and thus a table could be in readonly state for about this time. [#41092](https://github.com/ClickHouse/ClickHouse/pull/41092) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now projections can be used with zero copy replication (zero-copy replication is a non-production feature). [#41147](https://github.com/ClickHouse/ClickHouse/pull/41147) ([alesapin](https://github.com/alesapin)). +* Support expression `(EXPLAIN SELECT ...)` in a subquery. Queries like `SELECT * FROM (EXPLAIN PIPELINE SELECT col FROM TABLE ORDER BY col)` became valid. [#40630](https://github.com/ClickHouse/ClickHouse/pull/40630) ([Vladimir C](https://github.com/vdimir)). +* Allow changing `async_insert_max_data_size` or `async_insert_busy_timeout_ms` in scope of query. E.g. user wants to insert data rarely and she doesn't have access to the server config to tune default settings. [#40668](https://github.com/ClickHouse/ClickHouse/pull/40668) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Improvements for reading from remote filesystems, made threadpool size for reads/writes configurable. Closes [#41070](https://github.com/ClickHouse/ClickHouse/issues/41070). [#41011](https://github.com/ClickHouse/ClickHouse/pull/41011) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support all combinators combination in WindowTransform/arratReduce*/initializeAggregation/aggregate functions versioning. Previously combinators like `ForEach/Resample/Map` didn't work in these places, using them led to exception like`State function ... inserts results into non-state column`. [#41107](https://github.com/ClickHouse/ClickHouse/pull/41107) ([Kruglov Pavel](https://github.com/Avogar)). +* Add function `tryDecrypt` that returns NULL when decrypt fails (e.g. decrypt with incorrect key) instead of throwing an exception. [#41206](https://github.com/ClickHouse/ClickHouse/pull/41206) ([Duc Canh Le](https://github.com/canhld94)). +* Add the `unreserved_space` column to the `system.disks` table to check how much space is not taken by reservations per disk. [#41254](https://github.com/ClickHouse/ClickHouse/pull/41254) ([filimonov](https://github.com/filimonov)). +* Support s3 authorization headers in table function arguments. [#41261](https://github.com/ClickHouse/ClickHouse/pull/41261) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for MultiRead in Keeper and internal ZooKeeper client (this is an extension to ZooKeeper protocol, only available in ClickHouse Keeper). [#41410](https://github.com/ClickHouse/ClickHouse/pull/41410) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for decimal type comparing with floating point literal in IN operator. [#41544](https://github.com/ClickHouse/ClickHouse/pull/41544) ([liang.huang](https://github.com/lhuang09287750)). +* Allow readable size values (like `1TB`) in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)). +* ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these periods ClickHouse can nevertheless try to establish a connection and produce errors. This behavior is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)). +* +Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add function `tryBase58Decode`, similar to the existing function `tryBase64Decode`. [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)). +* Fix parallel parsing: segmentator now checks `max_block_size`. This fixed memory overallocation in case of parallel parsing and small LIMIT. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)). +* Don't add "TABLE_IS_DROPPED" exception to `system.errors` if it's happened during SELECT from a system table and was ignored. [#41908](https://github.com/ClickHouse/ClickHouse/pull/41908) ([AlfVII](https://github.com/AlfVII)). +* Improve option `enable_extended_results_for_datetime_functions` to return results of type DateTime64 for functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute` and `timeSlot`. [#41910](https://github.com/ClickHouse/ClickHouse/pull/41910) ([Roman Vasin](https://github.com/rvasin)). +* Improve `DateTime` type inference for text formats. Now it respects setting `date_time_input_format` and doesn't try to infer datetimes from numbers as timestamps. Closes [#41389](https://github.com/ClickHouse/ClickHouse/issues/41389) Closes [#42206](https://github.com/ClickHouse/ClickHouse/issues/42206). [#41912](https://github.com/ClickHouse/ClickHouse/pull/41912) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove confusing warning when inserting with `perform_ttl_move_on_insert` = false. [#41980](https://github.com/ClickHouse/ClickHouse/pull/41980) ([Vitaly Baranov](https://github.com/vitlibar)). +* Allow user to write `countState(*)` similar to `count(*)`. This closes [#9338](https://github.com/ClickHouse/ClickHouse/issues/9338). [#41983](https://github.com/ClickHouse/ClickHouse/pull/41983) ([Amos Bird](https://github.com/amosbird)). +* Fix `rankCorr` size overflow. [#42020](https://github.com/ClickHouse/ClickHouse/pull/42020) ([Duc Canh Le](https://github.com/canhld94)). +* Added an option to specify an arbitrary string as an environment name in the Sentry's config for more handy reports. [#42037](https://github.com/ClickHouse/ClickHouse/pull/42037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix parsing out-of-range Date from CSV. [#42044](https://github.com/ClickHouse/ClickHouse/pull/42044) ([Andrey Zvonov](https://github.com/zvonand)). +* `parseDataTimeBestEffort` now supports comma between date and time. Closes [#42038](https://github.com/ClickHouse/ClickHouse/issues/42038). [#42049](https://github.com/ClickHouse/ClickHouse/pull/42049) ([flynn](https://github.com/ucasfl)). +* Improved stale replica recovery process for `ReplicatedMergeTree`. If a lost replica has some parts which are absent from a healthy replica, but these parts should appear in the future according to the replication queue of the healthy replica, then the lost replica will keep such parts instead of detaching them. [#42134](https://github.com/ClickHouse/ClickHouse/pull/42134) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a possibility to use `Date32` arguments for date_diff function. Fix issue in date_diff function when using DateTime64 arguments with a start date before Unix epoch and end date after Unix epoch. [#42308](https://github.com/ClickHouse/ClickHouse/pull/42308) ([Roman Vasin](https://github.com/rvasin)). +* When uploading big parts to Minio, 'Complete Multipart Upload' can take a long time. Minio sends heartbeats every 10 seconds (see https://github.com/minio/minio/pull/7198). But clickhouse times out earlier, because the default send/receive timeout is [set](https://github.com/ClickHouse/ClickHouse/blob/cc24fcd6d5dfb67f5f66f5483e986bd1010ad9cf/src/IO/S3/PocoHTTPClient.cpp#L123) to 5 seconds. [#42321](https://github.com/ClickHouse/ClickHouse/pull/42321) ([filimonov](https://github.com/filimonov)). +* Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)). +* Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)). +* Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). This represents the biggest advancement for ClickHouse testing in this year so far. +* Beta version of the ClickHouse Cloud service is released: [https://clickhouse.cloud/](https://clickhouse.cloud/). It provides the easiest way to use ClickHouse (even slightly easier than the single-command installation). +* Added support of WHERE clause generation to AST Fuzzer and possibility to add or remove ORDER BY and WHERE clause. [#38519](https://github.com/ClickHouse/ClickHouse/pull/38519) ([Ilya Yatsishin](https://github.com/qoega)). +* Aarch64 binaries now require at least ARMv8.2, released in 2016. Most notably, this enables use of ARM LSE, i.e. native atomic operations. Also, CMake build option "NO_ARMV81_OR_HIGHER" has been added to allow compilation of binaries for older ARMv8.0 hardware, e.g. Raspberry Pi 4. [#41610](https://github.com/ClickHouse/ClickHouse/pull/41610) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow building ClickHouse with Musl (small changes after it was already supported but broken). [#41987](https://github.com/ClickHouse/ClickHouse/pull/41987) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the `$CLICKHOUSE_CRONFILE` file checking to avoid running the `sed` command to get the file not found error on install. [#42081](https://github.com/ClickHouse/ClickHouse/pull/42081) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Update cctz to `2022e` to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). +* Add Rust code support into ClickHouse with BLAKE3 hash-function library as an example. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Choose correct aggregation method for `LowCardinality` with big integer types. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). +* Several fixes for `web` disk. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes an issue that causes docker run to fail if `https_port` is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Mutations were not cancelled properly on server shutdown or `SYSTEM STOP MERGES` query and cancellation might take long time, it's fixed. [#41699](https://github.com/ClickHouse/ClickHouse/pull/41699) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix wrong result of queries with `ORDER BY` or `GROUP BY` by columns from prefix of sorting key, wrapped into monotonic functions, with enable "read in order" optimization (settings `optimize_read_in_order` and `optimize_aggregation_in_order`). [#41701](https://github.com/ClickHouse/ClickHouse/pull/41701) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Don't allow to create or alter merge tree tables with column name `_row_exists`, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Fix a bug that CORS headers are missing in some HTTP responses. [#41792](https://github.com/ClickHouse/ClickHouse/pull/41792) ([Frank Chen](https://github.com/FrankChen021)). +* 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* When the batch sending fails for some reason, it cannot be automatically recovered, and if it is not processed in time, it will lead to accumulation, and the printed error message will become longer and longer, which will cause the http thread to block. [#41813](https://github.com/ClickHouse/ClickHouse/pull/41813) ([zhongyuankai](https://github.com/zhongyuankai)). +* Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)). +* Old versions of Replicated database don't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix possible exception in fs cache. [#41884](https://github.com/ClickHouse/ClickHouse/pull/41884) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix `use_environment_credentials` for s3 table function. [#41970](https://github.com/ClickHouse/ClickHouse/pull/41970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed "Directory already exists and is not empty" error on detaching broken part that might prevent `ReplicatedMergeTree` table from starting replication. Fixes [#40957](https://github.com/ClickHouse/ClickHouse/issues/40957). [#41981](https://github.com/ClickHouse/ClickHouse/pull/41981) ([Alexander Tokmakov](https://github.com/tavplubix)). +* `toDateTime64` now returns the same output with negative integer and float arguments. [#42025](https://github.com/ClickHouse/ClickHouse/pull/42025) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix write into `azure_blob_storage`. Partially closes [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42034](https://github.com/ClickHouse/ClickHouse/pull/42034) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix the `bzip2` decoding issue for specific `bzip2` files. [#42046](https://github.com/ClickHouse/ClickHouse/pull/42046) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix SQL function `toLastDayOfMonth` with setting "enable_extended_results_for_datetime_functions = 1" at the beginning of the extended range (January 1900). - Fix SQL function "toRelativeWeekNum()" with setting "enable_extended_results_for_datetime_functions = 1" at the end of extended range (December 2299). - Improve the performance of for SQL functions "toISOYear()", "toFirstDayNumOfISOYearIndex()" and "toYearWeekOfNewyearMode()" by avoiding unnecessary index arithmetics. [#42084](https://github.com/ClickHouse/ClickHouse/pull/42084) ([Roman Vasin](https://github.com/rvasin)). +* The maximum size of fetches for each table accidentally was set to 8 while the pool size could be bigger. Now the maximum size of fetches for table is equal to the pool size. [#42090](https://github.com/ClickHouse/ClickHouse/pull/42090) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* A table might be shut down and a dictionary might be detached before checking if can be dropped without breaking dependencies between table, it's fixed. Fixes [#41982](https://github.com/ClickHouse/ClickHouse/issues/41982). [#42106](https://github.com/ClickHouse/ClickHouse/pull/42106) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible timeout exception for distributed queries with use_hedged_requests = 0. [#42130](https://github.com/ClickHouse/ClickHouse/pull/42130) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a minor bug inside function `runningDifference` in case of using it with `Date32` type. Previously `Date` was used and it may cause some logical errors like `Bad cast from type DB::ColumnVector to DB::ColumnVector'`. [#42143](https://github.com/ClickHouse/ClickHouse/pull/42143) ([Alfred Xu](https://github.com/sperlingxx)). +* Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). +* DISTINCT in order fails with LOGICAL_ERROR if first column in sorting key contains function. [#42186](https://github.com/ClickHouse/ClickHouse/pull/42186) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix read from `Buffer` tables with read in order desc. [#42236](https://github.com/ClickHouse/ClickHouse/pull/42236) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a bug which prevents ClickHouse to start when `background_pool_size setting` is set on default profile but `background_merges_mutations_concurrency_ratio` is not. [#42315](https://github.com/ClickHouse/ClickHouse/pull/42315) ([nvartolomei](https://github.com/nvartolomei)). +* `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a data race in query finish/cancel. This closes [#42346](https://github.com/ClickHouse/ClickHouse/issues/42346). [#42362](https://github.com/ClickHouse/ClickHouse/pull/42362) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix assert cast in join on falsy condition, Close [#42380](https://github.com/ClickHouse/ClickHouse/issues/42380). [#42407](https://github.com/ClickHouse/ClickHouse/pull/42407) ([Vladimir C](https://github.com/vdimir)). +* Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `AggregateFunctionQuantile` now correctly works with UInt128 columns. Previously, the quantile state interpreted `UInt128` columns as `Int128` which could have led to incorrect results. [#42473](https://github.com/ClickHouse/ClickHouse/pull/42473) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix bad_cast assert during INSERT into `Annoy` indexes over non-Float32 columns. `Annoy` indices is an experimental feature. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)). +* Arithmetic operator with Date or DateTime and 128 or 256-bit integer was referencing uninitialized memory. [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)). + + ### ClickHouse release 22.9, 2022-09-22 #### Backward Incompatible Change diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 7cba7c7548d..a554992caf3 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -81,6 +81,7 @@ elseif (ARCH_AMD64) option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) option (ENABLE_BMI "Use BMI instructions on x86_64" 0) + option (ENABLE_BMI2 "Use BMI2 instructions on x86_64 (depends on ENABLE_AVX2)" 0) option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) @@ -96,6 +97,7 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512 0) SET(ENABLE_AVX512_VBMI 0) SET(ENABLE_BMI 0) + SET(ENABLE_BMI2 0) SET(ENABLE_AVX2_FOR_SPEC_OP 0) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() @@ -243,6 +245,20 @@ elseif (ARCH_AMD64) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () + set (TEST_FLAG "-mbmi2") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _pdep_u64(0, 0); + (void)a; + return 0; + } + " HAVE_BMI2) + if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + # Limit avx2/avx512 flag for specific source build set (X86_INTRINSICS_FLAGS "") if (ENABLE_AVX2_FOR_SPEC_OP) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 73610545009..f0cef54b0b8 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -85,7 +85,7 @@ if (SANITIZE) # and they have a bunch of flags not halt the program if UIO happend and even to silence that warnings. # But for unknown reason that flags don't work with ClickHouse or we don't understand how to properly use them, # that's why we often receive reports about UIO. The simplest way to avoid this is just set this flag here. - set(UBSAN_FLAGS "${SAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") + set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow") endif() if (COMPILER_CLANG) set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index c2b16ae6dd6..8ebd4ab55d3 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -165,6 +165,7 @@ add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) add_contrib (qpl-cmake qpl) +add_contrib (morton-nd-cmake morton-nd) add_contrib(annoy-cmake annoy) diff --git a/contrib/morton-nd b/contrib/morton-nd new file mode 160000 index 00000000000..3795491a4aa --- /dev/null +++ b/contrib/morton-nd @@ -0,0 +1 @@ +Subproject commit 3795491a4aa3cdc916c8583094683f0d68df5bc0 diff --git a/contrib/morton-nd-cmake/CMakeLists.txt b/contrib/morton-nd-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4842781503f --- /dev/null +++ b/contrib/morton-nd-cmake/CMakeLists.txt @@ -0,0 +1,3 @@ +add_library(_morton_nd INTERFACE) +target_include_directories(_morton_nd SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/morton-nd/include/") +add_library(ch_contrib::morton_nd ALIAS _morton_nd) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 2954cd574d0..77afc3e924b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -73,7 +73,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH -ARG NFPM_VERSION=2.18.1 +ARG NFPM_VERSION=2.20.0 RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ diff --git a/docker/packager/packager b/docker/packager/packager index b4aa4ebdd91..83629dc7408 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -208,6 +208,7 @@ def parse_env_variables( cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if package_type == "deb": + # NOTE: This are the env for packages/build script result.append("MAKE_DEB=true") cmake_flags.append("-DENABLE_TESTS=0") cmake_flags.append("-DENABLE_UTILS=0") @@ -268,6 +269,7 @@ def parse_env_variables( result.append('DISTCC_HOSTS="localhost/`nproc`"') if additional_pkgs: + # NOTE: This are the env for packages/build script result.append("MAKE_APK=true") result.append("MAKE_RPM=true") result.append("MAKE_TGZ=true") diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 9d6cf22c817..de9125d565b 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -136,6 +136,7 @@ function clone_submodules contrib/wyhash contrib/hashidsxx contrib/c-ares + contrib/morton-nd ) git submodule sync diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 27c96acbae1..7058853b43e 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -271,10 +271,6 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_ || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \ && grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) -echo "Get previous release tag" -previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag) -echo $previous_release_tag - stop [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" @@ -332,6 +328,10 @@ zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \ echo -e "Backward compatibility check\n" +echo "Get previous release tag" +previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag) +echo $previous_release_tag + echo "Clone previous release repository" git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository @@ -480,6 +480,7 @@ else -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ + -e "Code: 269. DB::Exception: Destination table is myself" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 7f3f38bd8f5..a0ec86f7fbe 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -286,9 +286,7 @@ if __name__ == "__main__": # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. "--client-option", "max_untracked_memory=1Gi", - "--client-option", "max_memory_usage_for_user=0", - "--client-option", "memory_profiler_step=1Gi", # Use system database to avoid CREATE/DROP DATABASE queries "--database=system", diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index ef20c03883f..2a89bfda2e7 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -101,7 +101,7 @@ SELECT count() FROM uk_price_paid ``` -At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: +At the time this query was run, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse: ```sql SELECT formatReadableSize(total_bytes) @@ -342,7 +342,7 @@ The result looks like: ## Let's Speed Up Queries Using Projections {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). +[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At query time, ClickHouse will use your projection if it thinks the projection can improve the performance of the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful). ### Build a Projection {#build-projection} diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 93bd56087a2..6a1ca3176ad 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -17,6 +17,33 @@ title: Troubleshooting - Check firewall settings. - If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. +### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get} + +- The issue may be happened when the GPG key is changed. + +Please use the following scripts to resolve the issue: + +```bash +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +sudo apt-get update +``` + +### You Get the Unsupported Architecture Warning with Apt-get {#you-get-the-unsupported-architecture-warning-with-apt-get} + +- The completed warning message is as follows: + +``` +N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386' +``` + +To resolve the above issue, please use the following script: + +```bash +sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch +sudo apt-get clean +sudo apt-get autoclean +``` + ## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index eb357df19db..4a6e46e1759 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -376,14 +376,6 @@ Result: └─────┘ ``` -## UUIDStringToNum(str) - -Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16). - -## UUIDNumToString(str) - -Accepts a FixedString(16) value. Returns a string containing 36 characters in text format. - ## bitmaskToList(num) Accepts an integer. Returns a string containing the list of powers of two that total the source number when summed. They are comma-separated without spaces in text format, in ascending order. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index b8f222c2e4e..43542367cd5 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -211,12 +211,19 @@ SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid ## UUIDStringToNum -Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../sql-reference/data-types/fixedstring.md). +Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default). + +**Syntax** ``` sql -UUIDStringToNum(String) +UUIDStringToNum(string[, variant = 1]) ``` +**Arguments** + +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. + **Returned value** FixedString(16) @@ -235,14 +242,33 @@ SELECT └──────────────────────────────────────┴──────────────────┘ ``` +``` sql +SELECT + '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid, + UUIDStringToNum(uuid, 2) AS bytes +``` + +``` text +┌─uuid─────────────────────────────────┬─bytes────────────┐ +│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @&1 - PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) - PKG_PATH=${PKG_PATH##*created package: } - exec 9>&- + # Preserve package path + exec 9>&1 + PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9) + PKG_PATH=${PKG_PATH##*created package: } + exec 9>&- + fi if [ -n "$MAKE_APK" ]; then echo "Building apk package for $config" nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk fi + if [ -n "$MAKE_ARCHLINUX" ]; then + echo "Building archlinux package for $config" + nfpm package --target "$OUTPUT_DIR" --config "$config" --packager archlinux + fi if [ -n "$MAKE_RPM" ]; then echo "Building rpm package for $config" nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index 7803729c469..8f319c97b65 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -27,8 +27,8 @@ deb: Source: clickhouse contents: -- src: root/etc/clickhouse-keeper - dst: /etc/clickhouse-keeper +- src: root/etc/clickhouse-keeper/keeper_config.xml + dst: /etc/clickhouse-keeper/keeper_config.xml type: config - src: root/usr/bin/clickhouse-keeper dst: /usr/bin/clickhouse-keeper diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index a94ad1e9169..b0778e6bf72 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -42,8 +42,11 @@ deb: Source: clickhouse contents: -- src: root/etc/clickhouse-server - dst: /etc/clickhouse-server +- src: root/etc/clickhouse-server/config.xml + dst: /etc/clickhouse-server/config.xml + type: config +- src: root/etc/clickhouse-server/users.xml + dst: /etc/clickhouse-server/users.xml type: config - src: clickhouse-server.init dst: /etc/init.d/clickhouse-server diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 070f86aaad2..e5fd4d6bf8d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -8,9 +8,10 @@ #include #include #include +#include +#include #include #include -#include #include #include #include @@ -591,6 +592,18 @@ void LocalServer::processConfig() if (mmap_cache_size) global_context->setMMappedFileCache(mmap_cache_size); +#if USE_EMBEDDED_COMPILER + /// 128 MB + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; + size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); + + constexpr size_t compiled_expression_cache_elements_size_default = 10000; + size_t compiled_expression_cache_elements_size + = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); + + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); +#endif + /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); diff --git a/src/Analyzer/AggregationUtils.cpp b/src/Analyzer/AggregationUtils.cpp new file mode 100644 index 00000000000..a73df87f9c2 --- /dev/null +++ b/src/Analyzer/AggregationUtils.cpp @@ -0,0 +1,114 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_AGGREGATION; +} + +namespace +{ + +class CollectAggregateFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit CollectAggregateFunctionNodesVisitor(QueryTreeNodes * aggregate_function_nodes_) + : aggregate_function_nodes(aggregate_function_nodes_) + {} + + explicit CollectAggregateFunctionNodesVisitor(String assert_no_aggregates_place_message_) + : assert_no_aggregates_place_message(std::move(assert_no_aggregates_place_message_)) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !function_node->isAggregateFunction()) + return; + + if (!assert_no_aggregates_place_message.empty()) + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, + "Aggregate function {} is found {} in query", + function_node->formatASTForErrorMessage(), + assert_no_aggregates_place_message); + + if (aggregate_function_nodes) + aggregate_function_nodes->push_back(node); + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + String assert_no_aggregates_place_message; + QueryTreeNodes * aggregate_function_nodes = nullptr; +}; + +} + +QueryTreeNodes collectAggregateFunctionNodes(const QueryTreeNodePtr & node) +{ + QueryTreeNodes result; + CollectAggregateFunctionNodesVisitor visitor(&result); + visitor.visit(node); + + return result; +} + +void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result) +{ + CollectAggregateFunctionNodesVisitor visitor(&result); + visitor.visit(node); +} + +void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message) +{ + CollectAggregateFunctionNodesVisitor visitor(assert_no_aggregates_place_message); + visitor.visit(node); +} + +namespace +{ + +class ValidateGroupingFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit ValidateGroupingFunctionNodesVisitor(String assert_no_grouping_function_place_message_) + : assert_no_grouping_function_place_message(std::move(assert_no_grouping_function_place_message_)) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (function_node && function_node->getFunctionName() == "grouping") + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, + "GROUPING function {} is found {} in query", + function_node->formatASTForErrorMessage(), + assert_no_grouping_function_place_message); + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + String assert_no_grouping_function_place_message; +}; + +} + +void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message) +{ + ValidateGroupingFunctionNodesVisitor visitor(assert_no_grouping_function_place_message); + visitor.visit(node); +} + +} diff --git a/src/Analyzer/AggregationUtils.h b/src/Analyzer/AggregationUtils.h new file mode 100644 index 00000000000..c2e53e55c04 --- /dev/null +++ b/src/Analyzer/AggregationUtils.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +/** Collect aggregate function nodes in node children. + * Do not visit subqueries. + */ +QueryTreeNodes collectAggregateFunctionNodes(const QueryTreeNodePtr & node); + +/** Collect aggregate function nodes in node children and add them into result. + * Do not visit subqueries. + */ +void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result); + +/** Assert that there are no aggregate function nodes in node children. + * Do not visit subqueries. + */ +void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message); + +/** Assert that there are no GROUPING functions in node children. + * Do not visit subqueries. + */ +void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message); + +} diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp new file mode 100644 index 00000000000..2157b5edf6f --- /dev/null +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -0,0 +1,71 @@ +#include + +#include +#include +#include + +#include + +#include + +namespace DB +{ + +ArrayJoinNode::ArrayJoinNode(QueryTreeNodePtr table_expression_, QueryTreeNodePtr join_expressions_, bool is_left_) + : IQueryTreeNode(children_size) + , is_left(is_left_) +{ + children[table_expression_child_index] = std::move(table_expression_); + children[join_expressions_child_index] = std::move(join_expressions_); +} + +void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this); + buffer << ", is_left: " << is_left; + + buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n"; + getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + + buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSIONS\n"; + getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4); +} + +bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return is_left == rhs_typed.is_left; +} + +void ArrayJoinNode::updateTreeHashImpl(HashState & state) const +{ + state.update(is_left); +} + +QueryTreeNodePtr ArrayJoinNode::cloneImpl() const +{ + return std::make_shared(getTableExpression(), getJoinExpressionsNode(), is_left); +} + +ASTPtr ArrayJoinNode::toASTImpl() const +{ + auto array_join_ast = std::make_shared(); + array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner; + + const auto & join_expression_list_node = getJoinExpressionsNode(); + array_join_ast->children.push_back(join_expression_list_node->toAST()); + array_join_ast->expression_list = array_join_ast->children.back(); + + ASTPtr tables_in_select_query_ast = std::make_shared(); + addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index]); + + auto array_join_query_element_ast = std::make_shared(); + array_join_query_element_ast->children.push_back(std::move(array_join_ast)); + array_join_query_element_ast->array_join = array_join_query_element_ast->children.back(); + + tables_in_select_query_ast->children.push_back(std::move(array_join_query_element_ast)); + + return tables_in_select_query_ast; +} + +} diff --git a/src/Analyzer/ArrayJoinNode.h b/src/Analyzer/ArrayJoinNode.h new file mode 100644 index 00000000000..50d53df465a --- /dev/null +++ b/src/Analyzer/ArrayJoinNode.h @@ -0,0 +1,113 @@ +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace DB +{ + +/** Array join node represents array join in query tree. + * + * In query tree array join expressions are represented by list query tree node. + * + * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a. + * + * Multiple expressions can be inside single array join. + * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a, [4, 5, 6] as b. + * Example: SELECT id FROM test_table ARRAY JOIN array_column_1 AS value_1, array_column_2 AS value_2. + * + * Multiple array joins can be inside JOIN TREE. + * Example: SELECT id FROM test_table ARRAY JOIN array_column_1 ARRAY JOIN array_column_2. + * + * Array join can be used inside JOIN TREE with ordinary JOINS. + * Example: SELECT t1.id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id ARRAY JOIN [1,2,3]; + * Example: SELECT t1.id FROM test_table_1 AS t1 ARRAY JOIN [1,2,3] INNER JOIN test_table_2 AS t2 ON t1.id = t2.id; + */ +class ArrayJoinNode; +using ArrayJoinNodePtr = std::shared_ptr; + +class ArrayJoinNode final : public IQueryTreeNode +{ +public: + /** Construct array join node with table expression. + * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a. + * test_table - table expression. + * join_expression_list - list of array join expressions. + */ + ArrayJoinNode(QueryTreeNodePtr table_expression_, QueryTreeNodePtr join_expressions_, bool is_left_); + + /// Get table expression + const QueryTreeNodePtr & getTableExpression() const + { + return children[table_expression_child_index]; + } + + /// Get table expression + QueryTreeNodePtr & getTableExpression() + { + return children[table_expression_child_index]; + } + + /// Get join expressions + const ListNode & getJoinExpressions() const + { + return children[join_expressions_child_index]->as(); + } + + /// Get join expressions + ListNode & getJoinExpressions() + { + return children[join_expressions_child_index]->as(); + } + + /// Get join expressions node + const QueryTreeNodePtr & getJoinExpressionsNode() const + { + return children[join_expressions_child_index]; + } + + /// Get join expressions node + QueryTreeNodePtr & getJoinExpressionsNode() + { + return children[join_expressions_child_index]; + } + + /// Returns true if array join is left, false otherwise + bool isLeft() const + { + return is_left; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::ARRAY_JOIN; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + bool is_left = false; + + static constexpr size_t table_expression_child_index = 0; + static constexpr size_t join_expressions_child_index = 1; + static constexpr size_t children_size = join_expressions_child_index + 1; +}; + +} + diff --git a/src/Analyzer/CMakeLists.txt b/src/Analyzer/CMakeLists.txt new file mode 100644 index 00000000000..766767b5c13 --- /dev/null +++ b/src/Analyzer/CMakeLists.txt @@ -0,0 +1,7 @@ +if (ENABLE_TESTS) + add_subdirectory(tests) +endif() + +if (ENABLE_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp new file mode 100644 index 00000000000..4d0d349dabb --- /dev/null +++ b/src/Analyzer/ColumnNode.cpp @@ -0,0 +1,97 @@ +#include + +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ColumnNode::ColumnNode(NameAndTypePair column_, QueryTreeNodePtr expression_node_, QueryTreeNodeWeakPtr column_source_) + : IQueryTreeNode(children_size, weak_pointers_size) + , column(std::move(column_)) +{ + children[expression_child_index] = std::move(expression_node_); + getSourceWeakPointer() = std::move(column_source_); +} + +ColumnNode::ColumnNode(NameAndTypePair column_, QueryTreeNodeWeakPtr column_source_) + : ColumnNode(std::move(column_), nullptr /*expression_node*/, std::move(column_source_)) +{ +} + +QueryTreeNodePtr ColumnNode::getColumnSource() const +{ + auto lock = getSourceWeakPointer().lock(); + if (!lock) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column {} {} query tree node does not have valid source node", + column.name, + column.type->getName()); + + return lock; +} + +QueryTreeNodePtr ColumnNode::getColumnSourceOrNull() const +{ + return getSourceWeakPointer().lock(); +} + +void ColumnNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "COLUMN id: " << state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", column_name: " << column.name << ", result_type: " << column.type->getName(); + + auto column_source_ptr = getSourceWeakPointer().lock(); + if (column_source_ptr) + buffer << ", source_id: " << state.getNodeId(column_source_ptr.get()); + + const auto & expression = getExpression(); + + if (expression) + { + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n"; + expression->dumpTreeImpl(buffer, state, indent + 4); + } +} + +bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return column == rhs_typed.column; +} + +void ColumnNode::updateTreeHashImpl(HashState & hash_state) const +{ + hash_state.update(column.name.size()); + hash_state.update(column.name); + + const auto & column_type_name = column.type->getName(); + hash_state.update(column_type_name.size()); + hash_state.update(column_type_name); +} + +QueryTreeNodePtr ColumnNode::cloneImpl() const +{ + return std::make_shared(column, getColumnSource()); +} + +ASTPtr ColumnNode::toASTImpl() const +{ + return std::make_shared(column.name); +} + +} diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h new file mode 100644 index 00000000000..fbd788ae6fd --- /dev/null +++ b/src/Analyzer/ColumnNode.h @@ -0,0 +1,156 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Column node represents column in query tree. + * Column node can have weak pointer to its column source. + * Column source can be table expression, lambda, subquery. + * + * For table ALIAS columns. Column node must contain expression. + * For ARRAY JOIN join expression column. Column node must contain expression. + * + * During query analysis pass identifier node is resolved into column. See IdentifierNode.h. + * + * Examples: + * SELECT id FROM test_table. id is identifier that must be resolved to column node during query analysis pass. + * SELECT lambda(x -> x + 1, [1,2,3]). x is identifier inside lambda that must be resolved to column node during query analysis pass. + * + * Column node is initialized with column name, type and column source weak pointer. + * In case of ALIAS column node is initialized with column name, type, alias expression and column source weak pointer. + */ +class ColumnNode; +using ColumnNodePtr = std::shared_ptr; + +class ColumnNode final : public IQueryTreeNode +{ +public: + /// Construct column node with column name, type, column expression and column source weak pointer + ColumnNode(NameAndTypePair column_, QueryTreeNodePtr expression_node_, QueryTreeNodeWeakPtr column_source_); + + /// Construct column node with column name, type and column source weak pointer + ColumnNode(NameAndTypePair column_, QueryTreeNodeWeakPtr column_source_); + + /// Get column + const NameAndTypePair & getColumn() const + { + return column; + } + + /// Get column name + const String & getColumnName() const + { + return column.name; + } + + /// Get column type + const DataTypePtr & getColumnType() const + { + return column.type; + } + + /// Set column type + void setColumnType(DataTypePtr column_type) + { + column.type = std::move(column_type); + } + + /// Returns true if column node has expression, false otherwise + bool hasExpression() const + { + return children[expression_child_index] != nullptr; + } + + /// Get column node expression node + const QueryTreeNodePtr & getExpression() const + { + return children[expression_child_index]; + } + + /// Get column node expression node + QueryTreeNodePtr & getExpression() + { + return children[expression_child_index]; + } + + /// Get column node expression node, if there are no expression node exception is thrown + QueryTreeNodePtr & getExpressionOrThrow() + { + if (!children[expression_child_index]) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column expression is not initialized"); + + return children[expression_child_index]; + } + + /// Set column node expression node + void setExpression(QueryTreeNodePtr expression_value) + { + children[expression_child_index] = std::move(expression_value); + } + + /** Get column source. + * If column source is not valid logical exception is thrown. + */ + QueryTreeNodePtr getColumnSource() const; + + /** Get column source. + * If column source is not valid null is returned. + */ + QueryTreeNodePtr getColumnSourceOrNull() const; + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::COLUMN; + } + + String getName() const override + { + return column.name; + } + + DataTypePtr getResultType() const override + { + return column.type; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + const QueryTreeNodeWeakPtr & getSourceWeakPointer() const + { + return weak_pointers[source_weak_pointer_index]; + } + + QueryTreeNodeWeakPtr & getSourceWeakPointer() + { + return weak_pointers[source_weak_pointer_index]; + } + + NameAndTypePair column; + + static constexpr size_t expression_child_index = 0; + static constexpr size_t children_size = expression_child_index + 1; + + static constexpr size_t source_weak_pointer_index = 0; + static constexpr size_t weak_pointers_size = source_weak_pointer_index + 1; +}; + +} diff --git a/src/Analyzer/ColumnTransformers.cpp b/src/Analyzer/ColumnTransformers.cpp new file mode 100644 index 00000000000..27466ce5c27 --- /dev/null +++ b/src/Analyzer/ColumnTransformers.cpp @@ -0,0 +1,357 @@ +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +/// IColumnTransformerNode implementation + +const char * toString(ColumnTransfomerType type) +{ + switch (type) + { + case ColumnTransfomerType::APPLY: return "APPLY"; + case ColumnTransfomerType::EXCEPT: return "EXCEPT"; + case ColumnTransfomerType::REPLACE: return "REPLACE"; + } +} + +IColumnTransformerNode::IColumnTransformerNode(size_t children_size) + : IQueryTreeNode(children_size) +{} + +/// ApplyColumnTransformerNode implementation + +const char * toString(ApplyColumnTransformerType type) +{ + switch (type) + { + case ApplyColumnTransformerType::LAMBDA: return "LAMBDA"; + case ApplyColumnTransformerType::FUNCTION: return "FUNCTION"; + } +} + +ApplyColumnTransformerNode::ApplyColumnTransformerNode(QueryTreeNodePtr expression_node_) + : IColumnTransformerNode(children_size) +{ + if (expression_node_->getNodeType() == QueryTreeNodeType::LAMBDA) + apply_transformer_type = ApplyColumnTransformerType::LAMBDA; + else if (expression_node_->getNodeType() == QueryTreeNodeType::FUNCTION) + apply_transformer_type = ApplyColumnTransformerType::FUNCTION; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Apply column transformer expression must be lambda or function. Actual {}", + expression_node_->getNodeTypeName()); + + children[expression_child_index] = std::move(expression_node_); +} + +void ApplyColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "APPLY COLUMN TRANSFORMER id: " << format_state.getNodeId(this); + buffer << ", apply_transformer_type: " << toString(apply_transformer_type); + + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; + + const auto & expression_node = getExpressionNode(); + expression_node->dumpTreeImpl(buffer, format_state, indent + 4); +} + +bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return apply_transformer_type == rhs_typed.apply_transformer_type; +} + +void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +{ + hash_state.update(static_cast(getTransformerType())); + hash_state.update(static_cast(getApplyTransformerType())); +} + +QueryTreeNodePtr ApplyColumnTransformerNode::cloneImpl() const +{ + return std::make_shared(getExpressionNode()); +} + +ASTPtr ApplyColumnTransformerNode::toASTImpl() const +{ + auto ast_apply_transformer = std::make_shared(); + const auto & expression_node = getExpressionNode(); + + if (apply_transformer_type == ApplyColumnTransformerType::FUNCTION) + { + auto & function_expression = expression_node->as(); + ast_apply_transformer->func_name = function_expression.getFunctionName(); + ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST(); + } + else + { + auto & lambda_expression = expression_node->as(); + if (!lambda_expression.getArgumentNames().empty()) + ast_apply_transformer->lambda_arg = lambda_expression.getArgumentNames()[0]; + ast_apply_transformer->lambda = lambda_expression.toAST(); + } + + return ast_apply_transformer; +} + +/// ExceptColumnTransformerNode implementation + +ExceptColumnTransformerNode::ExceptColumnTransformerNode(Names except_column_names_, bool is_strict_) + : IColumnTransformerNode(children_size) + , except_transformer_type(ExceptColumnTransformerType::COLUMN_LIST) + , except_column_names(std::move(except_column_names_)) + , is_strict(is_strict_) +{ +} + +ExceptColumnTransformerNode::ExceptColumnTransformerNode(std::shared_ptr column_matcher_) + : IColumnTransformerNode(children_size) + , except_transformer_type(ExceptColumnTransformerType::REGEXP) + , column_matcher(std::move(column_matcher_)) +{ +} + +bool ExceptColumnTransformerNode::isColumnMatching(const std::string & column_name) const +{ + if (column_matcher) + return RE2::PartialMatch(column_name, *column_matcher); + + for (const auto & name : except_column_names) + if (column_name == name) + return true; + + return false; +} + +const char * toString(ExceptColumnTransformerType type) +{ + switch (type) + { + case ExceptColumnTransformerType::REGEXP: + return "REGEXP"; + case ExceptColumnTransformerType::COLUMN_LIST: + return "COLUMN_LIST"; + } +} + +void ExceptColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "EXCEPT COLUMN TRANSFORMER id: " << format_state.getNodeId(this); + buffer << ", except_transformer_type: " << toString(except_transformer_type); + + if (column_matcher) + { + buffer << ", pattern: " << column_matcher->pattern(); + return; + } + else + { + buffer << ", identifiers: "; + + size_t except_column_names_size = except_column_names.size(); + for (size_t i = 0; i < except_column_names_size; ++i) + { + buffer << except_column_names[i]; + + if (i + 1 != except_column_names_size) + buffer << ", "; + } + } +} + +bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (except_transformer_type != rhs_typed.except_transformer_type || + is_strict != rhs_typed.is_strict || + except_column_names != rhs_typed.except_column_names) + return false; + + const auto & rhs_column_matcher = rhs_typed.column_matcher; + + if (!column_matcher && !rhs_column_matcher) + return true; + else if (column_matcher && !rhs_column_matcher) + return false; + else if (!column_matcher && rhs_column_matcher) + return false; + + return column_matcher->pattern() == rhs_column_matcher->pattern(); +} + +void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +{ + hash_state.update(static_cast(getTransformerType())); + hash_state.update(static_cast(getExceptTransformerType())); + + hash_state.update(except_column_names.size()); + + for (const auto & column_name : except_column_names) + { + hash_state.update(column_name.size()); + hash_state.update(column_name); + } + + if (column_matcher) + { + const auto & pattern = column_matcher->pattern(); + hash_state.update(pattern.size()); + hash_state.update(pattern); + } +} + +QueryTreeNodePtr ExceptColumnTransformerNode::cloneImpl() const +{ + if (except_transformer_type == ExceptColumnTransformerType::REGEXP) + return std::make_shared(column_matcher); + + return std::make_shared(except_column_names, is_strict); +} + +ASTPtr ExceptColumnTransformerNode::toASTImpl() const +{ + auto ast_except_transformer = std::make_shared(); + + if (column_matcher) + { + ast_except_transformer->setPattern(column_matcher->pattern()); + return ast_except_transformer; + } + + ast_except_transformer->children.reserve(except_column_names.size()); + for (const auto & name : except_column_names) + ast_except_transformer->children.push_back(std::make_shared(name)); + + return ast_except_transformer; +} + +/// ReplaceColumnTransformerNode implementation + +ReplaceColumnTransformerNode::ReplaceColumnTransformerNode(const std::vector & replacements_, bool is_strict_) + : IColumnTransformerNode(children_size) + , is_strict(is_strict_) +{ + children[replacements_child_index] = std::make_shared(); + + auto & replacement_expressions_nodes = getReplacements().getNodes(); + + std::unordered_set replacement_names_set; + + for (const auto & replacement : replacements_) + { + auto [_, inserted] = replacement_names_set.emplace(replacement.column_name); + + if (!inserted) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expressions in column transformer replace should not contain same replacement {} more than once", + replacement.column_name); + + replacements_names.push_back(replacement.column_name); + replacement_expressions_nodes.push_back(replacement.expression_node); + } +} + +QueryTreeNodePtr ReplaceColumnTransformerNode::findReplacementExpression(const std::string & expression_name) +{ + auto it = std::find(replacements_names.begin(), replacements_names.end(), expression_name); + if (it == replacements_names.end()) + return {}; + + size_t replacement_index = it - replacements_names.begin(); + auto & replacement_expressions_nodes = getReplacements().getNodes(); + return replacement_expressions_nodes[replacement_index]; +} + +void ReplaceColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "REPLACE COLUMN TRANSFORMER id: " << format_state.getNodeId(this); + + const auto & replacements_nodes = getReplacements().getNodes(); + size_t replacements_size = replacements_nodes.size(); + buffer << '\n' << std::string(indent + 2, ' ') << "REPLACEMENTS " << replacements_size << '\n'; + + for (size_t i = 0; i < replacements_size; ++i) + { + const auto & replacement_name = replacements_names[i]; + buffer << std::string(indent + 4, ' ') << "REPLACEMENT NAME " << replacement_name; + buffer << " EXPRESSION" << '\n'; + const auto & expression_node = replacements_nodes[i]; + expression_node->dumpTreeImpl(buffer, format_state, indent + 6); + + if (i + 1 != replacements_size) + buffer << '\n'; + } +} + +bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return is_strict == rhs_typed.is_strict && replacements_names == rhs_typed.replacements_names; +} + +void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +{ + hash_state.update(static_cast(getTransformerType())); + + const auto & replacement_expressions_nodes = getReplacements().getNodes(); + size_t replacements_size = replacement_expressions_nodes.size(); + hash_state.update(replacements_size); + + for (size_t i = 0; i < replacements_size; ++i) + { + const auto & replacement_name = replacements_names[i]; + hash_state.update(replacement_name.size()); + hash_state.update(replacement_name); + } +} + +QueryTreeNodePtr ReplaceColumnTransformerNode::cloneImpl() const +{ + auto result_replace_transformer = std::make_shared(std::vector{}, false); + + result_replace_transformer->is_strict = is_strict; + result_replace_transformer->replacements_names = replacements_names; + + return result_replace_transformer; +} + +ASTPtr ReplaceColumnTransformerNode::toASTImpl() const +{ + auto ast_replace_transformer = std::make_shared(); + + const auto & replacement_expressions_nodes = getReplacements().getNodes(); + size_t replacements_size = replacement_expressions_nodes.size(); + + ast_replace_transformer->children.reserve(replacements_size); + + for (size_t i = 0; i < replacements_size; ++i) + { + auto replacement_ast = std::make_shared(); + replacement_ast->name = replacements_names[i]; + replacement_ast->expr = replacement_expressions_nodes[i]->toAST(); + ast_replace_transformer->children.push_back(replacement_ast); + } + + return ast_replace_transformer; +} + +} diff --git a/src/Analyzer/ColumnTransformers.h b/src/Analyzer/ColumnTransformers.h new file mode 100644 index 00000000000..e96e606d923 --- /dev/null +++ b/src/Analyzer/ColumnTransformers.h @@ -0,0 +1,316 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +/** Transformers are query tree nodes that handle additional logic that you can apply after MatcherQueryTreeNode is resolved. + * Check MatcherQueryTreeNode.h before reading this documentation. + * + * They main purpose is to apply some logic for expressions after matcher is resolved. + * There are 3 types of transformers: + * + * 1. APPLY transformer: + * APPLY transformer transform matched expression using lambda or function into another expression. + * It has 2 syntax variants: + * 1. lambda variant: SELECT matcher APPLY (x -> expr(x)). + * 2. function variant: SELECT matcher APPLY function_name(optional_parameters). + * + * 2. EXCEPT transformer: + * EXCEPT transformer discard some columns. + * It has 2 syntax variants: + * 1. regexp variant: SELECT matcher EXCEPT ('regexp'). + * 2. column names list variant: SELECT matcher EXCEPT (column_name_1, ...). + * + * 3. REPLACE transformer: + * REPLACE transformer applies similar transformation as APPLY transformer, but only for expressions + * that match replacement expression name. + * + * Example: + * CREATE TABLE test_table (id UInt64) ENGINE=TinyLog; + * SELECT * REPLACE (id + 1 AS id) FROM test_table. + * This query is transformed into SELECT id + 1 FROM test_table. + * It is important that AS id is not alias, it is replacement name. id + 1 is replacement expression. + * + * REPLACE transformer cannot contain multiple replacements with same name. + * + * REPLACE transformer expression does not necessary include replacement column name. + * Example: + * SELECT * REPLACE (1 AS id) FROM test_table. + * + * REPLACE transformer expression does not throw exception if there are no columns to apply replacement. + * Example: + * SELECT * REPLACE (1 AS unknown_column) FROM test_table; + * + * REPLACE transform can contain multiple replacements. + * Example: + * SELECT * REPLACE (1 AS id, 2 AS value). + * + * Matchers can be combined together and chained. + * Example: + * SELECT * EXCEPT (id) APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table. + */ + +/// Column transformer type +enum class ColumnTransfomerType +{ + APPLY, + EXCEPT, + REPLACE +}; + +/// Get column transformer type name +const char * toString(ColumnTransfomerType type); + +class IColumnTransformerNode; +using ColumnTransformerNodePtr = std::shared_ptr; +using ColumnTransformersNodes = std::vector; + +/// IColumnTransformer base interface. +class IColumnTransformerNode : public IQueryTreeNode +{ +public: + /// Get transformer type + virtual ColumnTransfomerType getTransformerType() const = 0; + + /// Get transformer type name + const char * getTransformerTypeName() const + { + return toString(getTransformerType()); + } + + QueryTreeNodeType getNodeType() const final + { + return QueryTreeNodeType::TRANSFORMER; + } + +protected: + /// Construct column transformer node and resize children to children size + explicit IColumnTransformerNode(size_t children_size); +}; + +enum class ApplyColumnTransformerType +{ + LAMBDA, + FUNCTION +}; + +/// Get apply column transformer type name +const char * toString(ApplyColumnTransformerType type); + +class ApplyColumnTransformerNode; +using ApplyColumnTransformerNodePtr = std::shared_ptr; + +/// Apply column transformer +class ApplyColumnTransformerNode final : public IColumnTransformerNode +{ +public: + /** Initialize apply column transformer with expression node. + * Expression node must be lambda or function otherwise exception is thrown. + */ + explicit ApplyColumnTransformerNode(QueryTreeNodePtr expression_node_); + + /// Get apply transformer type + ApplyColumnTransformerType getApplyTransformerType() const + { + return apply_transformer_type; + } + + /// Get apply transformer expression node + const QueryTreeNodePtr & getExpressionNode() const + { + return children[expression_child_index]; + } + + ColumnTransfomerType getTransformerType() const override + { + return ColumnTransfomerType::APPLY; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + ApplyColumnTransformerType apply_transformer_type = ApplyColumnTransformerType::LAMBDA; + + static constexpr size_t expression_child_index = 0; + static constexpr size_t children_size = expression_child_index + 1; +}; + +/// Except column transformer type +enum class ExceptColumnTransformerType +{ + REGEXP, + COLUMN_LIST, +}; + +const char * toString(ExceptColumnTransformerType type); + +class ExceptColumnTransformerNode; +using ExceptColumnTransformerNodePtr = std::shared_ptr; + +/** Except column transformer. + * Strict EXCEPT column transformer must use all column names during matched nodes transformation. + * + * Example: + * CREATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog; + * SELECT * EXCEPT STRICT (id, value1) FROM test_table; + * Such query will throw exception because column with name `value1` was not matched by strict EXCEPT transformer. + * + * Strict is valid only for EXCEPT COLUMN_LIST transformer. + */ +class ExceptColumnTransformerNode final : public IColumnTransformerNode +{ +public: + /// Initialize except column transformer with column names + explicit ExceptColumnTransformerNode(Names except_column_names_, bool is_strict_); + + /// Initialize except column transformer with regexp column matcher + explicit ExceptColumnTransformerNode(std::shared_ptr column_matcher_); + + /// Get except transformer type + ExceptColumnTransformerType getExceptTransformerType() const + { + return except_transformer_type; + } + + /** Returns true if except column transformer is strict, false otherwise. + * Valid only for EXCEPT COLUMN_LIST transformer. + */ + bool isStrict() const + { + return is_strict; + } + + /// Returns true if except transformer match column name, false otherwise. + bool isColumnMatching(const std::string & column_name) const; + + /** Get except column names. + * Valid only for column list except transformer. + */ + const Names & getExceptColumnNames() const + { + return except_column_names; + } + + ColumnTransfomerType getTransformerType() const override + { + return ColumnTransfomerType::EXCEPT; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + ExceptColumnTransformerType except_transformer_type; + Names except_column_names; + std::shared_ptr column_matcher; + bool is_strict = false; + + static constexpr size_t children_size = 0; +}; + +class ReplaceColumnTransformerNode; +using ReplaceColumnTransformerNodePtr = std::shared_ptr; + +/** Replace column transformer. + * Strict replace column transformer must use all replacements during matched nodes transformation. + * + * Example: + * CREATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog; + * SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table; + * Such query will throw exception because column with name `value1` was not matched by strict REPLACE transformer. + */ +class ReplaceColumnTransformerNode final : public IColumnTransformerNode +{ +public: + /// Replacement is column name and replace expression + struct Replacement + { + std::string column_name; + QueryTreeNodePtr expression_node; + }; + + /// Initialize replace column transformer with replacements + explicit ReplaceColumnTransformerNode(const std::vector & replacements_, bool is_strict); + + ColumnTransfomerType getTransformerType() const override + { + return ColumnTransfomerType::REPLACE; + } + + /// Get replacements + const ListNode & getReplacements() const + { + return children[replacements_child_index]->as(); + } + + /// Get replacements node + const QueryTreeNodePtr & getReplacementsNode() const + { + return children[replacements_child_index]; + } + + /// Get replacements names + const Names & getReplacementsNames() const + { + return replacements_names; + } + + /// Returns true if replace column transformer is strict, false otherwise + bool isStrict() const + { + return is_strict; + } + + /** Returns replacement expression if replacement is registered for expression name, null otherwise. + * Returned replacement expression must be cloned by caller. + */ + QueryTreeNodePtr findReplacementExpression(const std::string & expression_name); + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + ListNode & getReplacements() + { + return children[replacements_child_index]->as(); + } + + Names replacements_names; + bool is_strict = false; + + static constexpr size_t replacements_child_index = 0; + static constexpr size_t children_size = replacements_child_index + 1; +}; + +} diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp new file mode 100644 index 00000000000..b7de2acb5a4 --- /dev/null +++ b/src/Analyzer/ConstantNode.cpp @@ -0,0 +1,71 @@ +#include + +#include +#include + +#include +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +ConstantNode::ConstantNode(ConstantValuePtr constant_value_) + : IQueryTreeNode(children_size) + , constant_value(std::move(constant_value_)) + , value_string(applyVisitor(FieldVisitorToString(), constant_value->getValue())) +{ +} + +ConstantNode::ConstantNode(Field value_, DataTypePtr value_data_type_) + : ConstantNode(std::make_shared(convertFieldToTypeOrThrow(value_, *value_data_type_), value_data_type_)) +{} + +ConstantNode::ConstantNode(Field value_) + : ConstantNode(value_, applyVisitor(FieldToDataType(), value_)) +{} + +void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; +} + +void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +{ + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + + hash_state.update(value_string.size()); + hash_state.update(value_string); +} + +QueryTreeNodePtr ConstantNode::cloneImpl() const +{ + return std::make_shared(constant_value); +} + +ASTPtr ConstantNode::toASTImpl() const +{ + return std::make_shared(constant_value->getValue()); +} + +} diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h new file mode 100644 index 00000000000..29e8cd25532 --- /dev/null +++ b/src/Analyzer/ConstantNode.h @@ -0,0 +1,82 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Constant node represents constant value in query tree. + * Constant value must be representable by Field. + * Examples: 1, 'constant_string', [1,2,3]. + */ +class ConstantNode; +using ConstantNodePtr = std::shared_ptr; + +class ConstantNode final : public IQueryTreeNode +{ +public: + /// Construct constant query tree node from constant value + explicit ConstantNode(ConstantValuePtr constant_value_); + + /** Construct constant query tree node from field and data type. + * + * Throws exception if value cannot be converted to value data type. + */ + explicit ConstantNode(Field value_, DataTypePtr value_data_type_); + + /// Construct constant query tree node from field, data type will be derived from field value + explicit ConstantNode(Field value_); + + /// Get constant value + const Field & getValue() const + { + return constant_value->getValue(); + } + + /// Get constant value string representation + const String & getValueStringRepresentation() const + { + return value_string; + } + + ConstantValuePtr getConstantValueOrNull() const override + { + return constant_value; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::CONSTANT; + } + + String getName() const override + { + return value_string; + } + + DataTypePtr getResultType() const override + { + return constant_value->getType(); + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + ConstantValuePtr constant_value; + String value_string; + + static constexpr size_t children_size = 0; +}; + +} diff --git a/src/Analyzer/ConstantValue.h b/src/Analyzer/ConstantValue.h new file mode 100644 index 00000000000..a9e2ffd9e65 --- /dev/null +++ b/src/Analyzer/ConstantValue.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** Immutable constant value representation during analysis stage. + * Some query nodes can be represented by constant (scalar subqueries, functions with constant arguments). + */ +class ConstantValue; +using ConstantValuePtr = std::shared_ptr; + +class ConstantValue +{ +public: + ConstantValue(Field value_, DataTypePtr data_type_) + : value(std::move(value_)) + , data_type(std::move(data_type_)) + {} + + const Field & getValue() const + { + return value; + } + + const DataTypePtr & getType() const + { + return data_type; + } +private: + Field value; + DataTypePtr data_type; +}; + +inline bool operator==(const ConstantValue & lhs, const ConstantValue & rhs) +{ + return lhs.getValue() == rhs.getValue() && lhs.getType()->equals(*rhs.getType()); +} + +inline bool operator!=(const ConstantValue & lhs, const ConstantValue & rhs) +{ + return !(lhs == rhs); +} + +} diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp new file mode 100644 index 00000000000..7468141b3d5 --- /dev/null +++ b/src/Analyzer/FunctionNode.cpp @@ -0,0 +1,215 @@ +#include + +#include +#include + +#include +#include + +#include + +#include + +#include + +#include + +namespace DB +{ + +FunctionNode::FunctionNode(String function_name_) + : IQueryTreeNode(children_size) + , function_name(function_name_) +{ + children[parameters_child_index] = std::make_shared(); + children[arguments_child_index] = std::make_shared(); +} + +void FunctionNode::resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value) +{ + aggregate_function = nullptr; + function = std::move(function_value); + result_type = std::move(result_type_value); + function_name = function->getName(); +} + +void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value) +{ + function = nullptr; + aggregate_function = std::move(aggregate_function_value); + result_type = std::move(result_type_value); + function_name = aggregate_function->getName(); +} + +void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value) +{ + resolveAsAggregateFunction(window_function_value, result_type_value); +} + +void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "FUNCTION id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", function_name: " << function_name; + + std::string function_type = "ordinary"; + if (isAggregateFunction()) + function_type = "aggregate"; + else if (isWindowFunction()) + function_type = "window"; + + buffer << ", function_type: " << function_type; + + if (result_type) + buffer << ", result_type: " + result_type->getName(); + + if (constant_value) + { + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + } + + const auto & parameters = getParameters(); + if (!parameters.getNodes().empty()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "PARAMETERS\n"; + parameters.dumpTreeImpl(buffer, format_state, indent + 4); + } + + const auto & arguments = getArguments(); + if (!arguments.getNodes().empty()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n"; + arguments.dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasWindow()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "WINDOW\n"; + getWindowNode()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +String FunctionNode::getName() const +{ + String name = function_name; + + const auto & parameters = getParameters(); + const auto & parameters_nodes = parameters.getNodes(); + if (!parameters_nodes.empty()) + { + name += '('; + name += parameters.getName(); + name += ')'; + } + + const auto & arguments = getArguments(); + name += '('; + name += arguments.getName(); + name += ')'; + + return name; +} + +bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (function_name != rhs_typed.function_name || + isAggregateFunction() != rhs_typed.isAggregateFunction() || + isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || + isWindowFunction() != rhs_typed.isWindowFunction()) + return false; + + if (result_type && rhs_typed.result_type && !result_type->equals(*rhs_typed.getResultType())) + return false; + else if (result_type && !rhs_typed.result_type) + return false; + else if (!result_type && rhs_typed.result_type) + return false; + + if (constant_value && rhs_typed.constant_value && *constant_value != *rhs_typed.constant_value) + return false; + else if (constant_value && !rhs_typed.constant_value) + return false; + else if (!constant_value && rhs_typed.constant_value) + return false; + + return true; +} + +void FunctionNode::updateTreeHashImpl(HashState & hash_state) const +{ + hash_state.update(function_name.size()); + hash_state.update(function_name); + hash_state.update(isOrdinaryFunction()); + hash_state.update(isAggregateFunction()); + hash_state.update(isWindowFunction()); + + if (result_type) + { + auto result_type_name = result_type->getName(); + hash_state.update(result_type_name.size()); + hash_state.update(result_type_name); + } + + if (constant_value) + { + auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue()); + hash_state.update(constant_dump.size()); + hash_state.update(constant_dump); + + auto constant_value_type_name = constant_value->getType()->getName(); + hash_state.update(constant_value_type_name.size()); + hash_state.update(constant_value_type_name); + } +} + +QueryTreeNodePtr FunctionNode::cloneImpl() const +{ + auto result_function = std::make_shared(function_name); + + /** This is valid for clone method to reuse same function pointers + * because ordinary functions or aggregate functions must be stateless. + */ + result_function->function = function; + result_function->aggregate_function = aggregate_function; + result_function->result_type = result_type; + result_function->constant_value = constant_value; + + return result_function; +} + +ASTPtr FunctionNode::toASTImpl() const +{ + auto function_ast = std::make_shared(); + + function_ast->name = function_name; + function_ast->is_window_function = isWindowFunction(); + + const auto & parameters = getParameters(); + if (!parameters.getNodes().empty()) + { + function_ast->children.push_back(parameters.toAST()); + function_ast->parameters = function_ast->children.back(); + } + + const auto & arguments = getArguments(); + function_ast->children.push_back(arguments.toAST()); + function_ast->arguments = function_ast->children.back(); + + auto window_node = getWindowNode(); + if (window_node) + { + if (auto * identifier_node = window_node->as()) + function_ast->window_name = identifier_node->getIdentifier().getFullName(); + else + function_ast->window_definition = window_node->toAST(); + } + + return function_ast; +} + +} diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h new file mode 100644 index 00000000000..18b4c6d445c --- /dev/null +++ b/src/Analyzer/FunctionNode.h @@ -0,0 +1,232 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; + +/** Function node represents function in query tree. + * Function syntax: function_name(parameter_1, ...)(argument_1, ...). + * If function does not have parameters its syntax is function_name(argument_1, ...). + * If function does not have arguments its syntax is function_name(). + * + * In query tree function parameters and arguments are represented by ListNode. + * + * Function can be: + * 1. Aggregate function. Example: quantile(0.5)(x), sum(x). + * 2. Non aggregate function. Example: plus(x, x). + * 3. Window function. Example: sum(x) OVER (PARTITION BY expr ORDER BY expr). + * + * Initially function node is initialized with function name. + * For window function client must initialize function window node. + * + * During query analysis pass function must be resolved using `resolveAsFunction`, `resolveAsAggregateFunction`, `resolveAsWindowFunction` methods. + * Resolved function is function that has result type and is initialized with concrete aggregate or non aggregate function. + */ +class FunctionNode; +using FunctionNodePtr = std::shared_ptr; + +class FunctionNode final : public IQueryTreeNode +{ +public: + /** Initialize function node with function name. + * Later during query analysis pass function must be resolved. + */ + explicit FunctionNode(String function_name_); + + /// Get function name + const String & getFunctionName() const + { + return function_name; + } + + /// Get parameters + const ListNode & getParameters() const + { + return children[parameters_child_index]->as(); + } + + /// Get parameters + ListNode & getParameters() + { + return children[parameters_child_index]->as(); + } + + /// Get parameters node + const QueryTreeNodePtr & getParametersNode() const + { + return children[parameters_child_index]; + } + + /// Get parameters node + QueryTreeNodePtr & getParametersNode() + { + return children[parameters_child_index]; + } + + /// Get arguments + const ListNode & getArguments() const + { + return children[arguments_child_index]->as(); + } + + /// Get arguments + ListNode & getArguments() + { + return children[arguments_child_index]->as(); + } + + /// Get arguments node + const QueryTreeNodePtr & getArgumentsNode() const + { + return children[arguments_child_index]; + } + + /// Get arguments node + QueryTreeNodePtr & getArgumentsNode() + { + return children[arguments_child_index]; + } + + /// Returns true if function node has window, false otherwise + bool hasWindow() const + { + return children[window_child_index] != nullptr; + } + + /** Get window node. + * Valid only for window function node. + * Result window node can be identifier node or window node. + * 1. It can be identifier node if window function is defined as expr OVER window_name. + * 2. It can be window node if window function is defined as expr OVER (window_name ...). + */ + const QueryTreeNodePtr & getWindowNode() const + { + return children[window_child_index]; + } + + /** Get window node. + * Valid only for window function node. + */ + QueryTreeNodePtr & getWindowNode() + { + return children[window_child_index]; + } + + /** Get non aggregate function. + * If function is not resolved nullptr returned. + */ + const FunctionOverloadResolverPtr & getFunction() const + { + return function; + } + + /** Get aggregate function. + * If function is not resolved nullptr returned. + * If function is resolved as non aggregate function nullptr returned. + */ + const AggregateFunctionPtr & getAggregateFunction() const + { + return aggregate_function; + } + + /// Is function node resolved + bool isResolved() const + { + return result_type != nullptr && (function != nullptr || aggregate_function != nullptr); + } + + /// Is function node window function + bool isWindowFunction() const + { + return getWindowNode() != nullptr; + } + + /// Is function node aggregate function + bool isAggregateFunction() const + { + return aggregate_function != nullptr && !isWindowFunction(); + } + + /// Is function node ordinary function + bool isOrdinaryFunction() const + { + return function != nullptr; + } + + /** Resolve function node as non aggregate function. + * It is important that function name is updated with resolved function name. + * Main motivation for this is query tree optimizations. + * Assume we have `multiIf` function with single condition, it can be converted to `if` function. + * Function name must be updated accordingly. + */ + void resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value); + + /** Resolve function node as aggregate function. + * It is important that function name is updated with resolved function name. + * Main motivation for this is query tree optimizations. + */ + void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value); + + /** Resolve function node as window function. + * It is important that function name is updated with resolved function name. + * Main motivation for this is query tree optimizations. + */ + void resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value); + + /// Perform constant folding for function node + void performConstantFolding(ConstantValuePtr constant_folded_value) + { + constant_value = std::move(constant_folded_value); + } + + ConstantValuePtr getConstantValueOrNull() const override + { + return constant_value; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::FUNCTION; + } + + DataTypePtr getResultType() const override + { + return result_type; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + String function_name; + FunctionOverloadResolverPtr function; + AggregateFunctionPtr aggregate_function; + DataTypePtr result_type; + ConstantValuePtr constant_value; + + static constexpr size_t parameters_child_index = 0; + static constexpr size_t arguments_child_index = 1; + static constexpr size_t window_child_index = 2; + static constexpr size_t children_size = window_child_index + 1; +}; + +} diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp new file mode 100644 index 00000000000..ea2412eadb2 --- /dev/null +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -0,0 +1,332 @@ +#include + +#include + +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +const char * toString(QueryTreeNodeType type) +{ + switch (type) + { + case QueryTreeNodeType::IDENTIFIER: return "IDENTIFIER"; + case QueryTreeNodeType::MATCHER: return "MATCHER"; + case QueryTreeNodeType::TRANSFORMER: return "TRANSFORMER"; + case QueryTreeNodeType::LIST: return "LIST"; + case QueryTreeNodeType::CONSTANT: return "CONSTANT"; + case QueryTreeNodeType::FUNCTION: return "FUNCTION"; + case QueryTreeNodeType::COLUMN: return "COLUMN"; + case QueryTreeNodeType::LAMBDA: return "LAMBDA"; + case QueryTreeNodeType::SORT: return "SORT"; + case QueryTreeNodeType::INTERPOLATE: return "INTERPOLATE"; + case QueryTreeNodeType::WINDOW: return "WINDOW"; + case QueryTreeNodeType::TABLE: return "TABLE"; + case QueryTreeNodeType::TABLE_FUNCTION: return "TABLE_FUNCTION"; + case QueryTreeNodeType::QUERY: return "QUERY"; + case QueryTreeNodeType::ARRAY_JOIN: return "ARRAY_JOIN"; + case QueryTreeNodeType::JOIN: return "JOIN"; + case QueryTreeNodeType::UNION: return "UNION"; + } +} + +IQueryTreeNode::IQueryTreeNode(size_t children_size, size_t weak_pointers_size) +{ + children.resize(children_size); + weak_pointers.resize(weak_pointers_size); +} + +IQueryTreeNode::IQueryTreeNode(size_t children_size) +{ + children.resize(children_size); +} + +namespace +{ + +using NodePair = std::pair; + +struct NodePairHash +{ + size_t operator()(const NodePair & node_pair) const + { + auto hash = std::hash(); + + size_t result = 0; + boost::hash_combine(result, hash(node_pair.first)); + boost::hash_combine(result, hash(node_pair.second)); + + return result; + } +}; + +} + +bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const +{ + std::vector nodes_to_process; + std::unordered_set equals_pairs; + + nodes_to_process.emplace_back(this, &rhs); + + while (!nodes_to_process.empty()) + { + auto nodes_to_compare = nodes_to_process.back(); + nodes_to_process.pop_back(); + + const auto * lhs_node_to_compare = nodes_to_compare.first; + const auto * rhs_node_to_compare = nodes_to_compare.second; + + if (equals_pairs.contains(std::make_pair(lhs_node_to_compare, rhs_node_to_compare))) + continue; + + assert(lhs_node_to_compare); + assert(rhs_node_to_compare); + + if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() || + lhs_node_to_compare->alias != rhs_node_to_compare->alias || + !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare)) + { + return false; + } + + const auto & lhs_children = lhs_node_to_compare->children; + const auto & rhs_children = rhs_node_to_compare->children; + + size_t lhs_children_size = lhs_children.size(); + if (lhs_children_size != rhs_children.size()) + return false; + + for (size_t i = 0; i < lhs_children_size; ++i) + { + const auto & lhs_child = lhs_children[i]; + const auto & rhs_child = rhs_children[i]; + + if (!lhs_child && !rhs_child) + continue; + else if (lhs_child && !rhs_child) + return false; + else if (!lhs_child && rhs_child) + return false; + + nodes_to_process.emplace_back(lhs_child.get(), rhs_child.get()); + } + + const auto & lhs_weak_pointers = lhs_node_to_compare->weak_pointers; + const auto & rhs_weak_pointers = rhs_node_to_compare->weak_pointers; + + size_t lhs_weak_pointers_size = lhs_weak_pointers.size(); + + if (lhs_weak_pointers_size != rhs_weak_pointers.size()) + return false; + + for (size_t i = 0; i < lhs_weak_pointers_size; ++i) + { + auto lhs_strong_pointer = lhs_weak_pointers[i].lock(); + auto rhs_strong_pointer = rhs_weak_pointers[i].lock(); + + if (!lhs_strong_pointer && !rhs_strong_pointer) + continue; + else if (lhs_strong_pointer && !rhs_strong_pointer) + return false; + else if (!lhs_strong_pointer && rhs_strong_pointer) + return false; + + nodes_to_process.emplace_back(lhs_strong_pointer.get(), rhs_strong_pointer.get()); + } + + equals_pairs.emplace(lhs_node_to_compare, rhs_node_to_compare); + } + + return true; +} + +IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const +{ + HashState hash_state; + + std::unordered_map node_to_identifier; + + std::vector nodes_to_process; + nodes_to_process.push_back(this); + + while (!nodes_to_process.empty()) + { + const auto * node_to_process = nodes_to_process.back(); + nodes_to_process.pop_back(); + + auto node_identifier_it = node_to_identifier.find(node_to_process); + if (node_identifier_it != node_to_identifier.end()) + { + hash_state.update(node_identifier_it->second); + continue; + } + + node_to_identifier.emplace(node_to_process, node_to_identifier.size()); + + hash_state.update(static_cast(node_to_process->getNodeType())); + if (!node_to_process->alias.empty()) + { + hash_state.update(node_to_process->alias.size()); + hash_state.update(node_to_process->alias); + } + + node_to_process->updateTreeHashImpl(hash_state); + + hash_state.update(node_to_process->children.size()); + + for (const auto & node_to_process_child : node_to_process->children) + { + if (!node_to_process_child) + continue; + + nodes_to_process.push_back(node_to_process_child.get()); + } + + hash_state.update(node_to_process->weak_pointers.size()); + + for (const auto & weak_pointer : node_to_process->weak_pointers) + { + auto strong_pointer = weak_pointer.lock(); + if (!strong_pointer) + continue; + + nodes_to_process.push_back(strong_pointer.get()); + } + } + + Hash result; + hash_state.get128(result); + + return result; +} + +QueryTreeNodePtr IQueryTreeNode::clone() const +{ + /** Clone tree with this node as root. + * + * Algorithm + * For each node we clone state and also create mapping old pointer to new pointer. + * For each cloned node we update weak pointers array. + * + * After that we can update pointer in weak pointers array using old pointer to new pointer mapping. + */ + std::unordered_map old_pointer_to_new_pointer; + std::vector weak_pointers_to_update_after_clone; + + QueryTreeNodePtr result_cloned_node_place; + + std::vector> nodes_to_clone; + nodes_to_clone.emplace_back(this, &result_cloned_node_place); + + while (!nodes_to_clone.empty()) + { + const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back(); + nodes_to_clone.pop_back(); + + auto node_clone = node_to_clone->cloneImpl(); + *place_for_cloned_node = node_clone; + + node_clone->setAlias(node_to_clone->alias); + node_clone->setOriginalAST(node_to_clone->original_ast); + node_clone->children = node_to_clone->children; + node_clone->weak_pointers = node_to_clone->weak_pointers; + + old_pointer_to_new_pointer.emplace(node_to_clone, node_clone); + + for (auto & child : node_clone->children) + { + if (!child) + continue; + + nodes_to_clone.emplace_back(child.get(), &child); + } + + for (auto & weak_pointer : node_clone->weak_pointers) + { + weak_pointers_to_update_after_clone.push_back(&weak_pointer); + } + } + + /** Update weak pointers to new pointers if they were changed during clone. + * To do this we check old pointer to new pointer map, if weak pointer + * strong pointer exists as old pointer in map, reinitialize weak pointer with new pointer. + */ + for (auto & weak_pointer_ptr : weak_pointers_to_update_after_clone) + { + assert(weak_pointer_ptr); + auto strong_pointer = weak_pointer_ptr->lock(); + auto it = old_pointer_to_new_pointer.find(strong_pointer.get()); + + /** If node had weak pointer to some other node and this node is not part of cloned subtree do not update weak pointer. + * It will continue to point to previous location and it is expected. + * + * Example: SELECT id FROM test_table; + * During analysis `id` is resolved as column node and `test_table` is column source. + * If we clone `id` column, result column node weak source pointer will point to the same `test_table` column source. + */ + if (it == old_pointer_to_new_pointer.end()) + continue; + + *weak_pointer_ptr = it->second; + } + + return result_cloned_node_place; +} + +ASTPtr IQueryTreeNode::toAST() const +{ + auto converted_node = toASTImpl(); + + if (auto * ast_with_alias = dynamic_cast(converted_node.get())) + converted_node->setAlias(alias); + + return converted_node; +} + +String IQueryTreeNode::formatOriginalASTForErrorMessage() const +{ + if (!original_ast) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Original AST was not set"); + + return original_ast->formatForErrorMessage(); +} + +String IQueryTreeNode::formatConvertedASTForErrorMessage() const +{ + return toAST()->formatForErrorMessage(); +} + +String IQueryTreeNode::dumpTree() const +{ + WriteBufferFromOwnString buffer; + dumpTree(buffer); + + return buffer.str(); +} + +size_t IQueryTreeNode::FormatState::getNodeId(const IQueryTreeNode * node) +{ + auto [it, _] = node_to_id.emplace(node, node_to_id.size()); + return it->second; +} + +void IQueryTreeNode::dumpTree(WriteBuffer & buffer) const +{ + FormatState state; + dumpTreeImpl(buffer, state, 0); +} + +} diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h new file mode 100644 index 00000000000..2ba96d27575 --- /dev/null +++ b/src/Analyzer/IQueryTreeNode.h @@ -0,0 +1,282 @@ +#pragma once + +#include +#include +#include + +#include + +#include + +#include + +#include +#include + +class SipHash; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; +} + +class WriteBuffer; + +/// Query tree node type +enum class QueryTreeNodeType +{ + IDENTIFIER, + MATCHER, + TRANSFORMER, + LIST, + CONSTANT, + FUNCTION, + COLUMN, + LAMBDA, + SORT, + INTERPOLATE, + WINDOW, + TABLE, + TABLE_FUNCTION, + QUERY, + ARRAY_JOIN, + JOIN, + UNION +}; + +/// Convert query tree node type to string +const char * toString(QueryTreeNodeType type); + +/** Query tree is semantical representation of query. + * Query tree node represent node in query tree. + * IQueryTreeNode is base class for all query tree nodes. + * + * Important property of query tree is that each query tree node can contain weak pointers to other + * query tree nodes. Keeping weak pointer to other query tree nodes can be useful for example for column + * to keep weak pointer to column source, column source can be table, lambda, subquery and preserving of + * such information can significantly simplify query planning. + * + * Another important property of query tree it must be convertible to AST without losing information. + */ +class IQueryTreeNode; +using QueryTreeNodePtr = std::shared_ptr; +using QueryTreeNodes = std::vector; +using QueryTreeNodeWeakPtr = std::weak_ptr; +using QueryTreeWeakNodes = std::vector; + +class IQueryTreeNode : public TypePromotion +{ +public: + virtual ~IQueryTreeNode() = default; + + /// Get query tree node type + virtual QueryTreeNodeType getNodeType() const = 0; + + /// Get query tree node type name + const char * getNodeTypeName() const + { + return toString(getNodeType()); + } + + /** Get name of query tree node that can be used as part of expression. + * TODO: Projection name, expression name must be refactored in better interface. + */ + virtual String getName() const + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getName is not supported for {} query node", getNodeTypeName()); + } + + /** Get result type of query tree node that can be used as part of expression. + * If node does not support this method exception is thrown. + * TODO: Maybe this can be a part of ExpressionQueryTreeNode. + */ + virtual DataTypePtr getResultType() const + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for {} query node", getNodeTypeName()); + } + + /// Returns true if node has constant value + bool hasConstantValue() const + { + return getConstantValueOrNull() != nullptr; + } + + /** Returns constant value with type if node has constant value, and can be replaced with it. + * Examples: scalar subquery, function with constant arguments. + */ + virtual const ConstantValue & getConstantValue() const + { + auto constant_value = getConstantValueOrNull(); + if (!constant_value) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Node does not have constant value"); + + return *constant_value; + } + + /// Returns constant value with type if node has constant value or null otherwise + virtual ConstantValuePtr getConstantValueOrNull() const + { + return {}; + } + + /** Is tree equal to other tree with node root. + * + * Aliases of query tree nodes are compared during isEqual call. + * Original ASTs of query tree nodes are not compared during isEqual call. + */ + bool isEqual(const IQueryTreeNode & rhs) const; + + using Hash = std::pair; + using HashState = SipHash; + + /** Get tree hash identifying current tree + * + * Alias of query tree node is part of query tree hash. + * Original AST is not part of query tree hash. + */ + Hash getTreeHash() const; + + /// Get a deep copy of the query tree + QueryTreeNodePtr clone() const; + + /// Returns true if node has alias, false otherwise + bool hasAlias() const + { + return !alias.empty(); + } + + /// Get node alias + const String & getAlias() const + { + return alias; + } + + /// Set node alias + void setAlias(String alias_value) + { + alias = std::move(alias_value); + } + + /// Remove node alias + void removeAlias() + { + alias = {}; + } + + /// Returns true if query tree node has original AST, false otherwise + bool hasOriginalAST() const + { + return original_ast != nullptr; + } + + /// Get query tree node original AST + const ASTPtr & getOriginalAST() const + { + return original_ast; + } + + /** Set query tree node original AST. + * This AST will not be modified later. + */ + void setOriginalAST(ASTPtr original_ast_value) + { + original_ast = std::move(original_ast_value); + } + + /** If query tree has original AST format it for error message. + * Otherwise exception is thrown. + */ + String formatOriginalASTForErrorMessage() const; + + /// Convert query tree to AST + ASTPtr toAST() const; + + /// Convert query tree to AST and then format it for error message. + String formatConvertedASTForErrorMessage() const; + + /** Format AST for error message. + * If original AST exists use `formatOriginalASTForErrorMessage`. + * Otherwise use `formatConvertedASTForErrorMessage`. + */ + String formatASTForErrorMessage() const + { + if (original_ast) + return formatOriginalASTForErrorMessage(); + + return formatConvertedASTForErrorMessage(); + } + + /// Dump query tree to string + String dumpTree() const; + + /// Dump query tree to buffer + void dumpTree(WriteBuffer & buffer) const; + + class FormatState + { + public: + size_t getNodeId(const IQueryTreeNode * node); + + private: + std::unordered_map node_to_id; + }; + + /** Dump query tree to buffer starting with indent. + * + * Node must also dump its children. + */ + virtual void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const = 0; + + /// Get query tree node children + QueryTreeNodes & getChildren() + { + return children; + } + + /// Get query tree node children + const QueryTreeNodes & getChildren() const + { + return children; + } + +protected: + /** Construct query tree node. + * Resize children to children size. + * Resize weak pointers to weak pointers size. + */ + explicit IQueryTreeNode(size_t children_size, size_t weak_pointers_size); + + /// Construct query tree node and resize children to children size + explicit IQueryTreeNode(size_t children_size); + + /** Subclass must compare its internal state with rhs node internal state and do not compare children or weak pointers to other + * query tree nodes. + */ + virtual bool isEqualImpl(const IQueryTreeNode & rhs) const = 0; + + /** Subclass must update tree hash with its internal state and do not update tree hash for children or weak pointers to other + * query tree nodes. + */ + virtual void updateTreeHashImpl(HashState & hash_state) const = 0; + + /** Subclass must clone its internal state and do not clone children or weak pointers to other + * query tree nodes. + */ + virtual QueryTreeNodePtr cloneImpl() const = 0; + + /// Subclass must convert its internal state and its children to AST + virtual ASTPtr toASTImpl() const = 0; + + QueryTreeNodes children; + QueryTreeWeakNodes weak_pointers; + +private: + String alias; + ASTPtr original_ast; +}; + +} diff --git a/src/Analyzer/IQueryTreePass.h b/src/Analyzer/IQueryTreePass.h new file mode 100644 index 00000000000..39b3d743ed3 --- /dev/null +++ b/src/Analyzer/IQueryTreePass.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include + + +namespace DB +{ + +/** After query tree is build it can be later processed by query tree passes. + * This is abstract base class for all query tree passes. + * + * Query tree pass can make query tree modifications, after each pass query tree must be valid. + * Query tree pass must be isolated and perform only necessary query tree modifications for doing its job. + * Dependencies between passes must be avoided. + */ +class IQueryTreePass; +using QueryTreePassPtr = std::shared_ptr; +using QueryTreePasses = std::vector; + +class IQueryTreePass +{ +public: + virtual ~IQueryTreePass() = default; + + /// Get query tree pass name + virtual String getName() = 0; + + /// Get query tree pass description + virtual String getDescription() = 0; + + /// Run pass over query tree + virtual void run(QueryTreeNodePtr query_tree_node, ContextPtr context) = 0; + +}; + +} diff --git a/src/Analyzer/Identifier.h b/src/Analyzer/Identifier.h new file mode 100644 index 00000000000..2252ce2854f --- /dev/null +++ b/src/Analyzer/Identifier.h @@ -0,0 +1,412 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include + + +namespace DB +{ + +/** Identifier consists from identifier parts. + * Each identifier part is arbitrary long sequence of digits, underscores, lowercase and uppercase letters. + * Example: a, a.b, a.b.c. + */ +class Identifier +{ +public: + Identifier() = default; + + /// Create Identifier from parts + explicit Identifier(const std::vector & parts_) + : parts(parts_) + , full_name(boost::algorithm::join(parts, ".")) + { + } + + /// Create Identifier from parts + explicit Identifier(std::vector && parts_) + : parts(std::move(parts_)) + , full_name(boost::algorithm::join(parts, ".")) + { + } + + /// Create Identifier from full name, full name is split with '.' as separator. + explicit Identifier(const std::string & full_name_) + : full_name(full_name_) + { + boost::split(parts, full_name, [](char c) { return c == '.'; }); + } + + /// Create Identifier from full name, full name is split with '.' as separator. + explicit Identifier(std::string && full_name_) + : full_name(std::move(full_name_)) + { + boost::split(parts, full_name, [](char c) { return c == '.'; }); + } + + const std::string & getFullName() const + { + return full_name; + } + + const std::vector & getParts() const + { + return parts; + } + + size_t getPartsSize() const + { + return parts.size(); + } + + bool empty() const + { + return parts.empty(); + } + + bool isEmpty() const + { + return parts.empty(); + } + + bool isShort() const + { + return parts.size() == 1; + } + + bool isCompound() const + { + return parts.size() > 1; + } + + const std::string & at(size_t index) const + { + if (index >= parts.size()) + throw std::out_of_range("identifier access part is out of range"); + + return parts[index]; + } + + const std::string & operator[](size_t index) const + { + return parts[index]; + } + + const std::string & front() const + { + return parts.front(); + } + + const std::string & back() const + { + return parts.back(); + } + + /// Returns true, if identifier starts with part, false otherwise + bool startsWith(const std::string_view & part) + { + return !parts.empty() && parts[0] == part; + } + + /// Returns true, if identifier ends with part, false otherwise + bool endsWith(const std::string_view & part) + { + return !parts.empty() && parts.back() == part; + } + + using const_iterator = std::vector::const_iterator; + + const_iterator begin() const + { + return parts.begin(); + } + + const_iterator end() const + { + return parts.end(); + } + + void popFirst(size_t parts_to_remove_size) + { + assert(parts_to_remove_size <= parts.size()); + + size_t parts_size = parts.size(); + std::vector result_parts; + result_parts.reserve(parts_size - parts_to_remove_size); + + for (size_t i = parts_to_remove_size; i < parts_size; ++i) + result_parts.push_back(std::move(parts[i])); + + parts = std::move(result_parts); + full_name = boost::algorithm::join(parts, "."); + } + + void popFirst() + { + return popFirst(1); + } + + void popLast(size_t parts_to_remove_size) + { + assert(parts_to_remove_size <= parts.size()); + + for (size_t i = 0; i < parts_to_remove_size; ++i) + { + size_t last_part_size = parts.back().size(); + parts.pop_back(); + bool is_not_last = !parts.empty(); + full_name.resize(full_name.size() - (last_part_size + static_cast(is_not_last))); + } + } + + void popLast() + { + return popLast(1); + } + + void pop_back() /// NOLINT + { + popLast(); + } + + void push_back(std::string && part) /// NOLINT + { + parts.push_back(std::move(part)); + full_name += '.'; + full_name += parts.back(); + } + + void push_back(const std::string & part) /// NOLINT + { + parts.push_back(part); + full_name += '.'; + full_name += parts.back(); + } + + template + void emplace_back(Args&&... args) /// NOLINT + { + parts.emplace_back(std::forward(args)...); + full_name += '.'; + full_name += parts.back(); + } +private: + std::vector parts; + std::string full_name; +}; + +inline bool operator==(const Identifier & lhs, const Identifier & rhs) +{ + return lhs.getFullName() == rhs.getFullName(); +} + +inline bool operator!=(const Identifier & lhs, const Identifier & rhs) +{ + return !(lhs == rhs); +} + +inline std::ostream & operator<<(std::ostream & stream, const Identifier & identifier) +{ + stream << identifier.getFullName(); + return stream; +} + +using Identifiers = std::vector; + +/// View for Identifier +class IdentifierView +{ +public: + IdentifierView() = default; + + IdentifierView(const Identifier & identifier) /// NOLINT + : full_name_view(identifier.getFullName()) + , parts_start_it(identifier.begin()) + , parts_end_it(identifier.end()) + {} + + std::string_view getFullName() const + { + return full_name_view; + } + + size_t getPartsSize() const + { + return parts_end_it - parts_start_it; + } + + bool empty() const + { + return parts_start_it == parts_end_it; + } + + bool isEmpty() const + { + return parts_start_it == parts_end_it; + } + + bool isShort() const + { + return getPartsSize() == 1; + } + + bool isCompound() const + { + return getPartsSize() > 1; + } + + std::string_view at(size_t index) const + { + if (index >= getPartsSize()) + throw std::out_of_range("identifier access part is out of range"); + + return *(parts_start_it + index); + } + + std::string_view operator[](size_t index) const + { + return *(parts_start_it + index); + } + + std::string_view front() const + { + return *parts_start_it; + } + + std::string_view back() const + { + return *(parts_end_it - 1); + } + + bool startsWith(std::string_view part) const + { + return !isEmpty() && *parts_start_it == part; + } + + bool endsWith(std::string_view part) const + { + return !isEmpty() && *(parts_end_it - 1) == part; + } + + void popFirst(size_t parts_to_remove_size) + { + assert(parts_to_remove_size <= getPartsSize()); + + for (size_t i = 0; i < parts_to_remove_size; ++i) + { + size_t part_size = parts_start_it->size(); + ++parts_start_it; + bool is_not_last = parts_start_it != parts_end_it; + full_name_view.remove_prefix(part_size + is_not_last); + } + } + + void popFirst() + { + popFirst(1); + } + + void popLast(size_t parts_to_remove_size) + { + assert(parts_to_remove_size <= getPartsSize()); + + for (size_t i = 0; i < parts_to_remove_size; ++i) + { + size_t last_part_size = (parts_end_it - 1)->size(); + --parts_end_it; + bool is_not_last = parts_start_it != parts_end_it; + full_name_view.remove_suffix(last_part_size + is_not_last); + } + } + + void popLast() + { + popLast(1); + } + + using const_iterator = Identifier::const_iterator; + + const_iterator begin() const + { + return parts_start_it; + } + + const_iterator end() const + { + return parts_end_it; + } +private: + std::string_view full_name_view; + const_iterator parts_start_it; + const_iterator parts_end_it; +}; + +inline bool operator==(const IdentifierView & lhs, const IdentifierView & rhs) +{ + return lhs.getFullName() == rhs.getFullName(); +} + +inline bool operator!=(const IdentifierView & lhs, const IdentifierView & rhs) +{ + return !(lhs == rhs); +} + +inline std::ostream & operator<<(std::ostream & stream, const IdentifierView & identifier_view) +{ + stream << identifier_view.getFullName(); + return stream; +} + +} + +/// See https://fmt.dev/latest/api.html#formatting-user-defined-types + +template <> +struct fmt::formatter +{ + constexpr static auto parse(format_parse_context & ctx) + { + const auto * it = ctx.begin(); + const auto * end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') + throw format_error("invalid format"); + + return it; + } + + template + auto format(const DB::Identifier & identifier, FormatContext & ctx) + { + return format_to(ctx.out(), "{}", identifier.getFullName()); + } +}; + +template <> +struct fmt::formatter +{ + constexpr static auto parse(format_parse_context & ctx) + { + const auto * it = ctx.begin(); + const auto * end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') + throw format_error("invalid format"); + + return it; + } + + template + auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx) + { + return format_to(ctx.out(), "{}", identifier_view.getFullName()); + } +}; diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp new file mode 100644 index 00000000000..4efc7f515ea --- /dev/null +++ b/src/Analyzer/IdentifierNode.cpp @@ -0,0 +1,75 @@ +#include + +#include + +#include +#include + +#include + +namespace DB +{ + +IdentifierNode::IdentifierNode(Identifier identifier_) + : IQueryTreeNode(children_size) + , identifier(std::move(identifier_)) +{} + +IdentifierNode::IdentifierNode(Identifier identifier_, TableExpressionModifiers table_expression_modifiers_) + : IQueryTreeNode(children_size) + , identifier(std::move(identifier_)) + , table_expression_modifiers(std::move(table_expression_modifiers_)) +{} + +void IdentifierNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "IDENTIFIER id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", identifier: " << identifier.getFullName(); + + if (table_expression_modifiers) + { + buffer << ", "; + table_expression_modifiers->dump(buffer); + } +} + +bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + + if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers) + return false; + else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers) + return false; + else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers) + return false; + + return identifier == rhs_typed.identifier; +} + +void IdentifierNode::updateTreeHashImpl(HashState & state) const +{ + const auto & identifier_name = identifier.getFullName(); + state.update(identifier_name.size()); + state.update(identifier_name); + + if (table_expression_modifiers) + table_expression_modifiers->updateTreeHash(state); +} + +QueryTreeNodePtr IdentifierNode::cloneImpl() const +{ + return std::make_shared(identifier); +} + +ASTPtr IdentifierNode::toASTImpl() const +{ + auto identifier_parts = identifier.getParts(); + return std::make_shared(std::move(identifier_parts)); +} + +} diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h new file mode 100644 index 00000000000..7a2351f0ece --- /dev/null +++ b/src/Analyzer/IdentifierNode.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** Identifier node represents identifier in query tree. + * Example: SELECT a FROM test_table. + * a - is identifier. + * test_table - is identifier. + * + * Identifier resolution must be done during query analysis pass. + */ +class IdentifierNode final : public IQueryTreeNode +{ +public: + /// Construct identifier node with identifier + explicit IdentifierNode(Identifier identifier_); + + /** Construct identifier node with identifier and table expression modifiers + * when identifier node is part of JOIN TREE. + * + * Example: SELECT * FROM test_table SAMPLE 0.1 OFFSET 0.1 FINAL + */ + explicit IdentifierNode(Identifier identifier_, TableExpressionModifiers table_expression_modifiers_); + + /// Get identifier + const Identifier & getIdentifier() const + { + return identifier; + } + + /// Return true if identifier node has table expression modifiers, false otherwise + bool hasTableExpressionModifiers() const + { + return table_expression_modifiers.has_value(); + } + + /// Get table expression modifiers + const std::optional & getTableExpressionModifiers() const + { + return table_expression_modifiers; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::IDENTIFIER; + } + + String getName() const override + { + return identifier.getFullName(); + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + Identifier identifier; + std::optional table_expression_modifiers; + + static constexpr size_t children_size = 0; +}; + +} diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h new file mode 100644 index 00000000000..96972024d87 --- /dev/null +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -0,0 +1,87 @@ +#pragma once + +#include + +#include + + +namespace DB +{ + +/** Visitor that traverse query tree in depth. + * Derived class must implement `visitImpl` method. + * Additionally subclass can control if child need to be visited using `needChildVisit` method, by + * default all node children are visited. + * By default visitor traverse tree from top to bottom, if bottom to top traverse is required subclass + * can override `shouldTraverseTopToBottom` method. + * + * Usage example: + * class FunctionsVisitor : public InDepthQueryTreeVisitor + * { + * void visitImpl(VisitQueryTreeNodeType & query_tree_node) + * { + * if (query_tree_node->getNodeType() == QueryTreeNodeType::FUNCTION) + * processFunctionNode(query_tree_node); + * } + * } + */ +template +class InDepthQueryTreeVisitor +{ +public: + using VisitQueryTreeNodeType = std::conditional_t; + + /// Return true if visitor should traverse tree top to bottom, false otherwise + bool shouldTraverseTopToBottom() const + { + return true; + } + + /// Return true if visitor should visit child, false otherwise + bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]]) + { + return true; + } + + void visit(VisitQueryTreeNodeType & query_tree_node) + { + bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom(); + if (!traverse_top_to_bottom) + visitChildren(query_tree_node); + + getDerived().visitImpl(query_tree_node); + + if (traverse_top_to_bottom) + visitChildren(query_tree_node); + } + +private: + Derived & getDerived() + { + return *static_cast(this); + } + + const Derived & getDerived() const + { + return *static_cast(this); + } + + void visitChildren(VisitQueryTreeNodeType & expression) + { + for (auto & child : expression->getChildren()) + { + if (!child) + continue; + + bool need_visit_child = getDerived().needChildVisit(expression, child); + + if (need_visit_child) + visit(child); + } + } +}; + +template +using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor; + +} diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp new file mode 100644 index 00000000000..dcc14d6b6d5 --- /dev/null +++ b/src/Analyzer/InterpolateNode.cpp @@ -0,0 +1,66 @@ +#include + +#include + +#include +#include + +#include + +namespace DB +{ + +InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_) + : IQueryTreeNode(children_size) +{ + children[expression_child_index] = std::move(expression_); + children[interpolate_expression_child_index] = std::move(interpolate_expression_); +} + +String InterpolateNode::getName() const +{ + String result = getExpression()->getName(); + result += " AS "; + result += getInterpolateExpression()->getName(); + + return result; +} + +void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "INTERPOLATE id: " << format_state.getNodeId(this); + + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n"; + getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + + buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE_EXPRESSION\n"; + getInterpolateExpression()->dumpTreeImpl(buffer, format_state, indent + 4); +} + +bool InterpolateNode::isEqualImpl(const IQueryTreeNode &) const +{ + /// No state in interpolate node + return true; +} + +void InterpolateNode::updateTreeHashImpl(HashState &) const +{ + /// No state in interpolate node +} + +QueryTreeNodePtr InterpolateNode::cloneImpl() const +{ + return std::make_shared(nullptr /*expression*/, nullptr /*interpolate_expression*/); +} + +ASTPtr InterpolateNode::toASTImpl() const +{ + auto result = std::make_shared(); + result->column = getExpression()->toAST()->getColumnName(); + result->children.push_back(getInterpolateExpression()->toAST()); + result->expr = result->children.back(); + + return result; +} + +} diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h new file mode 100644 index 00000000000..5bc8eded0bb --- /dev/null +++ b/src/Analyzer/InterpolateNode.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** Interpolate node represents expression interpolation in INTERPOLATE section that is part of ORDER BY section in query tree. + * + * Example: SELECT * FROM test_table ORDER BY id WITH FILL INTERPOLATE (value AS value + 1); + * value - expression to interpolate. + * value + 1 - interpolate expression. + */ +class InterpolateNode; +using InterpolateNodePtr = std::shared_ptr; + +class InterpolateNode final : public IQueryTreeNode +{ +public: + /// Initialize interpolate node with expression and interpolate expression + explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_); + + /// Get expression to interpolate + const QueryTreeNodePtr & getExpression() const + { + return children[expression_child_index]; + } + + /// Get expression to interpolate + QueryTreeNodePtr & getExpression() + { + return children[expression_child_index]; + } + + /// Get interpolate expression + const QueryTreeNodePtr & getInterpolateExpression() const + { + return children[interpolate_expression_child_index]; + } + + /// Get interpolate expression + QueryTreeNodePtr & getInterpolateExpression() + { + return children[interpolate_expression_child_index]; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::INTERPOLATE; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + static constexpr size_t expression_child_index = 0; + static constexpr size_t interpolate_expression_child_index = 1; + static constexpr size_t children_size = interpolate_expression_child_index + 1; +}; + +} diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp new file mode 100644 index 00000000000..28a0c4ad7e0 --- /dev/null +++ b/src/Analyzer/JoinNode.cpp @@ -0,0 +1,116 @@ +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ + +JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_, + QueryTreeNodePtr right_table_expression_, + QueryTreeNodePtr join_expression_, + JoinLocality locality_, + JoinStrictness strictness_, + JoinKind kind_) + : IQueryTreeNode(children_size) + , locality(locality_) + , strictness(strictness_) + , kind(kind_) +{ + children[left_table_expression_child_index] = std::move(left_table_expression_); + children[right_table_expression_child_index] = std::move(right_table_expression_); + children[join_expression_child_index] = std::move(join_expression_); +} + +ASTPtr JoinNode::toASTTableJoin() const +{ + auto join_ast = std::make_shared(); + join_ast->locality = locality; + join_ast->strictness = strictness; + join_ast->kind = kind; + + if (children[join_expression_child_index]) + { + auto join_expression_ast = children[join_expression_child_index]->toAST(); + + if (children[join_expression_child_index]->getNodeType() == QueryTreeNodeType::LIST) + join_ast->using_expression_list = std::move(join_expression_ast); + else + join_ast->on_expression = std::move(join_expression_ast); + } + + return join_ast; +} + +void JoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "JOIN id: " << format_state.getNodeId(this); + + if (locality != JoinLocality::Unspecified) + buffer << ", locality: " << toString(locality); + + if (strictness != JoinStrictness::Unspecified) + buffer << ", strictness: " << toString(strictness); + + buffer << ", kind: " << toString(kind); + + buffer << '\n' << std::string(indent + 2, ' ') << "LEFT TABLE EXPRESSION\n"; + getLeftTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + + buffer << '\n' << std::string(indent + 2, ' ') << "RIGHT TABLE EXPRESSION\n"; + getRightTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + + if (getJoinExpression()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSION\n"; + getJoinExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return locality == rhs_typed.locality && strictness == rhs_typed.strictness && kind == rhs_typed.kind; +} + +void JoinNode::updateTreeHashImpl(HashState & state) const +{ + state.update(locality); + state.update(strictness); + state.update(kind); +} + +QueryTreeNodePtr JoinNode::cloneImpl() const +{ + return std::make_shared(getLeftTableExpression(), getRightTableExpression(), getJoinExpression(), locality, strictness, kind); +} + +ASTPtr JoinNode::toASTImpl() const +{ + ASTPtr tables_in_select_query_ast = std::make_shared(); + + addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index]); + + size_t join_table_index = tables_in_select_query_ast->children.size(); + + auto join_ast = toASTTableJoin(); + + addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index]); + + auto & table_element = tables_in_select_query_ast->children.at(join_table_index)->as(); + table_element.children.push_back(std::move(join_ast)); + table_element.table_join = table_element.children.back(); + + return tables_in_select_query_ast; +} + +} diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h new file mode 100644 index 00000000000..15ba11a0122 --- /dev/null +++ b/src/Analyzer/JoinNode.h @@ -0,0 +1,152 @@ +#pragma once + +#include + +#include +#include +#include + +#include +#include + +#include + +namespace DB +{ + +/** Join node represents join in query tree. + * + * For JOIN without join expression, JOIN expression is null. + * Example: SELECT id FROM test_table_1 AS t1, test_table_2 AS t2; + * + * For JOIN with USING, JOIN expression contains list of identifier nodes. These nodes must be resolved + * during query analysis pass. + * Example: SELECT id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 USING (id); + * + * For JOIN with ON, JOIN expression contains single expression. + * Example: SELECT id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id; + */ +class JoinNode; +using JoinNodePtr = std::shared_ptr; + +class JoinNode final : public IQueryTreeNode +{ +public: + /** Construct join node with left table expression, right table expression and join expression. + * Example: SELECT id FROM test_table_1 INNER JOIN test_table_2 ON expression. + * + * test_table_1 - left table expression. + * test_table_2 - right table expression. + * expression - join expression. + */ + JoinNode(QueryTreeNodePtr left_table_expression_, + QueryTreeNodePtr right_table_expression_, + QueryTreeNodePtr join_expression_, + JoinLocality locality_, + JoinStrictness strictness_, + JoinKind kind_); + + /// Get left table expression + const QueryTreeNodePtr & getLeftTableExpression() const + { + return children[left_table_expression_child_index]; + } + + /// Get left table expression + QueryTreeNodePtr & getLeftTableExpression() + { + return children[left_table_expression_child_index]; + } + + /// Get right table expression + const QueryTreeNodePtr & getRightTableExpression() const + { + return children[right_table_expression_child_index]; + } + + /// Get right table expression + QueryTreeNodePtr & getRightTableExpression() + { + return children[right_table_expression_child_index]; + } + + /// Returns true if join has join expression, false otherwise + bool hasJoinExpression() const + { + return children[join_expression_child_index] != nullptr; + } + + /// Get join expression + const QueryTreeNodePtr & getJoinExpression() const + { + return children[join_expression_child_index]; + } + + /// Get join expression + QueryTreeNodePtr & getJoinExpression() + { + return children[join_expression_child_index]; + } + + /// Returns true if join has USING join expression, false otherwise + bool isUsingJoinExpression() const + { + return hasJoinExpression() && getJoinExpression()->getNodeType() == QueryTreeNodeType::LIST; + } + + /// Returns true if join has ON join expression, false otherwise + bool isOnJoinExpression() const + { + return hasJoinExpression() && getJoinExpression()->getNodeType() != QueryTreeNodeType::LIST; + } + + /// Get join locality + JoinLocality getLocality() const + { + return locality; + } + + /// Get join strictness + JoinStrictness getStrictness() const + { + return strictness; + } + + /// Get join kind + JoinKind getKind() const + { + return kind; + } + + /// Convert join node to ASTTableJoin + ASTPtr toASTTableJoin() const; + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::JOIN; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + JoinLocality locality = JoinLocality::Unspecified; + JoinStrictness strictness = JoinStrictness::Unspecified; + JoinKind kind = JoinKind::Inner; + + static constexpr size_t left_table_expression_child_index = 0; + static constexpr size_t right_table_expression_child_index = 1; + static constexpr size_t join_expression_child_index = 2; + static constexpr size_t children_size = join_expression_child_index + 1; +}; + +} + diff --git a/src/Analyzer/LambdaNode.cpp b/src/Analyzer/LambdaNode.cpp new file mode 100644 index 00000000000..ccc43e75644 --- /dev/null +++ b/src/Analyzer/LambdaNode.cpp @@ -0,0 +1,93 @@ +#include + +#include + +#include +#include +#include + +namespace DB +{ + +LambdaNode::LambdaNode(Names argument_names_, QueryTreeNodePtr expression_) + : IQueryTreeNode(children_size) + , argument_names(std::move(argument_names_)) +{ + auto arguments_list_node = std::make_shared(); + auto & nodes = arguments_list_node->getNodes(); + + size_t argument_names_size = argument_names.size(); + nodes.reserve(argument_names_size); + + for (size_t i = 0; i < argument_names_size; ++i) + nodes.push_back(std::make_shared(Identifier{argument_names[i]})); + + children[arguments_child_index] = std::move(arguments_list_node); + children[expression_child_index] = std::move(expression_); +} + +void LambdaNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "LAMBDA id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + const auto & arguments = getArguments(); + if (!arguments.getNodes().empty()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS " << '\n'; + getArguments().dumpTreeImpl(buffer, format_state, indent + 4); + } + + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION " << '\n'; + getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); +} + +String LambdaNode::getName() const +{ + return "lambda(" + children[arguments_child_index]->getName() + ") -> " + children[expression_child_index]->getName(); +} + +bool LambdaNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return argument_names == rhs_typed.argument_names; +} + +void LambdaNode::updateTreeHashImpl(HashState & state) const +{ + state.update(argument_names.size()); + for (const auto & argument_name : argument_names) + { + state.update(argument_name.size()); + state.update(argument_name); + } +} + +QueryTreeNodePtr LambdaNode::cloneImpl() const +{ + return std::make_shared(argument_names, getExpression()); +} + +ASTPtr LambdaNode::toASTImpl() const +{ + auto lambda_function_arguments_ast = std::make_shared(); + + auto tuple_function = std::make_shared(); + tuple_function->name = "tuple"; + tuple_function->children.push_back(children[arguments_child_index]->toAST()); + tuple_function->arguments = tuple_function->children.back(); + + lambda_function_arguments_ast->children.push_back(std::move(tuple_function)); + lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST()); + + auto lambda_function_ast = std::make_shared(); + lambda_function_ast->name = "lambda"; + lambda_function_ast->children.push_back(std::move(lambda_function_arguments_ast)); + lambda_function_ast->arguments = lambda_function_ast->children.back(); + + return lambda_function_ast; +} + +} diff --git a/src/Analyzer/LambdaNode.h b/src/Analyzer/LambdaNode.h new file mode 100644 index 00000000000..6061e854ab0 --- /dev/null +++ b/src/Analyzer/LambdaNode.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include + +#include + +namespace DB +{ + +/** Lambda node represents lambda expression in query tree. + * + * Lambda consist of argument names and lambda expression body. + * Lambda expression body does not necessary use lambda arguments. Example: SELECT arrayMap(x -> 1, [1, 2, 3]) + * + * Initially lambda is initialized with argument names and lambda body expression. + * + * Lambda expression result type can depend on arguments types. + * Example: WITH (x -> x) as lambda SELECT lambda(1), lambda('string_value'). + * + * During query analysis pass lambdas must be resolved. + * Lambda resolve must set concrete lambda arguments and resolve lambda expression body. + * In query tree lambda arguments are represented by ListNode. + * If client modified lambda arguments array its size must be equal to initial lambda argument names array. + * + * Examples: + * WITH (x -> x + 1) as lambda SELECT lambda(1); + * SELECT arrayMap(x -> x + 1, [1,2,3]); + */ +class LambdaNode; +using LambdaNodePtr = std::shared_ptr; + +class LambdaNode final : public IQueryTreeNode +{ +public: + /// Initialize lambda with argument names and lambda body expression + explicit LambdaNode(Names argument_names_, QueryTreeNodePtr expression_); + + /// Get argument names + const Names & getArgumentNames() const + { + return argument_names; + } + + /// Get arguments + const ListNode & getArguments() const + { + return children[arguments_child_index]->as(); + } + + /// Get arguments + ListNode & getArguments() + { + return children[arguments_child_index]->as(); + } + + /// Get arguments node + const QueryTreeNodePtr & getArgumentsNode() const + { + return children[arguments_child_index]; + } + + /// Get arguments node + QueryTreeNodePtr & getArgumentsNode() + { + return children[arguments_child_index]; + } + + /// Get expression + const QueryTreeNodePtr & getExpression() const + { + return children[expression_child_index]; + } + + /// Get expression + QueryTreeNodePtr & getExpression() + { + return children[expression_child_index]; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::LAMBDA; + } + + String getName() const override; + + DataTypePtr getResultType() const override + { + return getExpression()->getResultType(); + } + + ConstantValuePtr getConstantValueOrNull() const override + { + return getExpression()->getConstantValueOrNull(); + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + Names argument_names; + + static constexpr size_t arguments_child_index = 0; + static constexpr size_t expression_child_index = 1; + static constexpr size_t children_size = expression_child_index + 1; +}; + +} diff --git a/src/Analyzer/ListNode.cpp b/src/Analyzer/ListNode.cpp new file mode 100644 index 00000000000..b98e29deecd --- /dev/null +++ b/src/Analyzer/ListNode.cpp @@ -0,0 +1,88 @@ +#include + +#include + +#include +#include +#include + +#include + +namespace DB +{ + +ListNode::ListNode() + : IQueryTreeNode(0 /*children_size*/) +{} + +ListNode::ListNode(QueryTreeNodes nodes) + : IQueryTreeNode(0 /*children_size*/) +{ + children = std::move(nodes); +} + +void ListNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "LIST id: " << format_state.getNodeId(this); + + size_t children_size = children.size(); + buffer << ", nodes: " << children_size << '\n'; + + for (size_t i = 0; i < children_size; ++i) + { + const auto & node = children[i]; + node->dumpTreeImpl(buffer, format_state, indent + 2); + + if (i + 1 != children_size) + buffer << '\n'; + } +} + +String ListNode::getName() const +{ + if (children.empty()) + return ""; + + std::string result; + for (const auto & node : children) + { + result += node->getName(); + result += ", "; + } + + result.pop_back(); + result.pop_back(); + + return result; +} + +bool ListNode::isEqualImpl(const IQueryTreeNode &) const +{ + /// No state + return true; +} + +void ListNode::updateTreeHashImpl(HashState &) const +{ + /// No state +} + +QueryTreeNodePtr ListNode::cloneImpl() const +{ + return std::make_shared(); +} + +ASTPtr ListNode::toASTImpl() const +{ + auto expression_list_ast = std::make_shared(); + + size_t children_size = children.size(); + expression_list_ast->children.resize(children_size); + + for (size_t i = 0; i < children_size; ++i) + expression_list_ast->children[i] = children[i]->toAST(); + + return expression_list_ast; +} + +} diff --git a/src/Analyzer/ListNode.h b/src/Analyzer/ListNode.h new file mode 100644 index 00000000000..7d941731b25 --- /dev/null +++ b/src/Analyzer/ListNode.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +namespace DB +{ + +/** List node represents list of query tree nodes in query tree. + * + * Example: SELECT column_1, 1, 'constant_value' FROM table. + * column_1, 1, 'constant_value' is list query tree node. + */ +class ListNode; +using ListNodePtr = std::shared_ptr; + +class ListNode final : public IQueryTreeNode +{ +public: + /// Initialize list node with empty nodes + ListNode(); + + /// Initialize list node with nodes + explicit ListNode(QueryTreeNodes nodes); + + /// Get list nodes + const QueryTreeNodes & getNodes() const + { + return children; + } + + /// Get list nodes + QueryTreeNodes & getNodes() + { + return children; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::LIST; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState &) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; +}; + +} diff --git a/src/Analyzer/MatcherNode.cpp b/src/Analyzer/MatcherNode.cpp new file mode 100644 index 00000000000..0bed9110d25 --- /dev/null +++ b/src/Analyzer/MatcherNode.cpp @@ -0,0 +1,329 @@ +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +const char * toString(MatcherNodeType matcher_node_type) +{ + switch (matcher_node_type) + { + case MatcherNodeType::ASTERISK: + return "ASTERISK"; + case MatcherNodeType::COLUMNS_LIST: + return "COLUMNS_LIST"; + case MatcherNodeType::COLUMNS_REGEXP: + return "COLUMNS_REGEXP"; + } +} + +MatcherNode::MatcherNode(ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::ASTERISK, + {} /*qualified_identifier*/, + {} /*columns_identifiers*/, + {} /*columns_matcher*/, + std::move(column_transformers_) /*column_transformers*/) +{ +} + +MatcherNode::MatcherNode(Identifier qualified_identifier_, ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::ASTERISK, + std::move(qualified_identifier_), + {} /*columns_identifiers*/, + {} /*columns_matcher*/, + std::move(column_transformers_)) +{ +} + +MatcherNode::MatcherNode(std::shared_ptr columns_matcher_, ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::COLUMNS_REGEXP, + {} /*qualified_identifier*/, + {} /*columns_identifiers*/, + std::move(columns_matcher_), + std::move(column_transformers_)) +{ +} + +MatcherNode::MatcherNode(Identifier qualified_identifier_, std::shared_ptr columns_matcher_, ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::COLUMNS_REGEXP, + std::move(qualified_identifier_), + {} /*columns_identifiers*/, + std::move(columns_matcher_), + std::move(column_transformers_)) +{ +} + +MatcherNode::MatcherNode(Identifiers columns_identifiers_, ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::COLUMNS_LIST, + {} /*qualified_identifier*/, + std::move(columns_identifiers_), + {} /*columns_matcher*/, + std::move(column_transformers_)) +{ +} + +MatcherNode::MatcherNode(Identifier qualified_identifier_, Identifiers columns_identifiers_, ColumnTransformersNodes column_transformers_) + : MatcherNode(MatcherNodeType::COLUMNS_LIST, + std::move(qualified_identifier_), + std::move(columns_identifiers_), + {} /*columns_matcher*/, + std::move(column_transformers_)) +{ +} + +MatcherNode::MatcherNode(MatcherNodeType matcher_type_, + Identifier qualified_identifier_, + Identifiers columns_identifiers_, + std::shared_ptr columns_matcher_, + ColumnTransformersNodes column_transformers_) + : IQueryTreeNode(children_size) + , matcher_type(matcher_type_) + , qualified_identifier(qualified_identifier_) + , columns_identifiers(columns_identifiers_) + , columns_matcher(columns_matcher_) +{ + auto column_transformers_list_node = std::make_shared(); + + auto & column_transformers_nodes = column_transformers_list_node->getNodes(); + column_transformers_nodes.reserve(column_transformers_.size()); + + for (auto && column_transformer : column_transformers_) + column_transformers_nodes.emplace_back(std::move(column_transformer)); + + children[column_transformers_child_index] = std::move(column_transformers_list_node); + + columns_identifiers_set.reserve(columns_identifiers.size()); + + for (auto & column_identifier : columns_identifiers) + columns_identifiers_set.insert(column_identifier.getFullName()); +} + +bool MatcherNode::isMatchingColumn(const std::string & column_name) +{ + if (matcher_type == MatcherNodeType::ASTERISK) + return true; + + if (columns_matcher) + return RE2::PartialMatch(column_name, *columns_matcher); + + return columns_identifiers_set.contains(column_name); +} + +void MatcherNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "MATCHER id: " << format_state.getNodeId(this); + + buffer << ", matcher_type: " << toString(matcher_type); + + if (!qualified_identifier.empty()) + buffer << ", qualified_identifier: " << qualified_identifier.getFullName(); + + if (columns_matcher) + { + buffer << ", columns_pattern: " << columns_matcher->pattern(); + } + else if (matcher_type == MatcherNodeType::COLUMNS_LIST) + { + buffer << ", " << fmt::format("column_identifiers: {}", fmt::join(columns_identifiers, ", ")); + } + + const auto & column_transformers_list = getColumnTransformers(); + if (!column_transformers_list.getNodes().empty()) + { + buffer << '\n'; + column_transformers_list.dumpTreeImpl(buffer, format_state, indent + 2); + } +} + +String MatcherNode::getName() const +{ + WriteBufferFromOwnString buffer; + + if (!qualified_identifier.empty()) + buffer << qualified_identifier.getFullName() << '.'; + + if (matcher_type == MatcherNodeType::ASTERISK) + { + buffer << '*'; + } + else + { + buffer << "COLUMNS("; + + if (columns_matcher) + { + buffer << ' ' << columns_matcher->pattern(); + } + else if (matcher_type == MatcherNodeType::COLUMNS_LIST) + { + size_t columns_identifiers_size = columns_identifiers.size(); + for (size_t i = 0; i < columns_identifiers_size; ++i) + { + buffer << columns_identifiers[i].getFullName(); + + if (i + 1 != columns_identifiers_size) + buffer << ", "; + } + } + } + + buffer << ')'; + + const auto & column_transformers = getColumnTransformers().getNodes(); + size_t column_transformers_size = column_transformers.size(); + + for (size_t i = 0; i < column_transformers_size; ++i) + { + const auto & column_transformer = column_transformers[i]; + buffer << column_transformer->getName(); + + if (i + 1 != column_transformers_size) + buffer << ' '; + } + + return buffer.str(); +} + +bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (matcher_type != rhs_typed.matcher_type || + qualified_identifier != rhs_typed.qualified_identifier || + columns_identifiers != rhs_typed.columns_identifiers || + columns_identifiers_set != rhs_typed.columns_identifiers_set) + return false; + + const auto & rhs_columns_matcher = rhs_typed.columns_matcher; + + if (!columns_matcher && !rhs_columns_matcher) + return true; + else if (columns_matcher && !rhs_columns_matcher) + return false; + else if (!columns_matcher && rhs_columns_matcher) + return false; + + return columns_matcher->pattern() == rhs_columns_matcher->pattern(); +} + +void MatcherNode::updateTreeHashImpl(HashState & hash_state) const +{ + hash_state.update(static_cast(matcher_type)); + + const auto & qualified_identifier_full_name = qualified_identifier.getFullName(); + hash_state.update(qualified_identifier_full_name.size()); + hash_state.update(qualified_identifier_full_name); + + for (const auto & identifier : columns_identifiers) + { + const auto & identifier_full_name = identifier.getFullName(); + hash_state.update(identifier_full_name.size()); + hash_state.update(identifier_full_name); + } + + if (columns_matcher) + { + const auto & columns_matcher_pattern = columns_matcher->pattern(); + hash_state.update(columns_matcher_pattern.size()); + hash_state.update(columns_matcher_pattern); + } +} + +QueryTreeNodePtr MatcherNode::cloneImpl() const +{ + MatcherNodePtr matcher_node = std::make_shared(); + + matcher_node->matcher_type = matcher_type; + matcher_node->qualified_identifier = qualified_identifier; + matcher_node->columns_identifiers = columns_identifiers; + matcher_node->columns_matcher = columns_matcher; + matcher_node->columns_identifiers_set = columns_identifiers_set; + + return matcher_node; +} + +ASTPtr MatcherNode::toASTImpl() const +{ + ASTPtr result; + + if (matcher_type == MatcherNodeType::ASTERISK) + { + if (qualified_identifier.empty()) + { + result = std::make_shared(); + } + else + { + auto qualified_asterisk = std::make_shared(); + + auto identifier_parts = qualified_identifier.getParts(); + qualified_asterisk->children.push_back(std::make_shared(std::move(identifier_parts))); + + result = qualified_asterisk; + } + } + else if (columns_matcher) + { + if (qualified_identifier.empty()) + { + auto regexp_matcher = std::make_shared(); + regexp_matcher->setPattern(columns_matcher->pattern()); + result = regexp_matcher; + } + else + { + auto regexp_matcher = std::make_shared(); + regexp_matcher->setPattern(columns_matcher->pattern()); + + auto identifier_parts = qualified_identifier.getParts(); + regexp_matcher->children.push_back(std::make_shared(std::move(identifier_parts))); + + result = regexp_matcher; + } + } + else + { + auto column_list = std::make_shared(); + column_list->children.reserve(columns_identifiers.size()); + + for (const auto & identifier : columns_identifiers) + { + auto identifier_parts = identifier.getParts(); + column_list->children.push_back(std::make_shared(std::move(identifier_parts))); + } + + if (qualified_identifier.empty()) + { + auto columns_list_matcher = std::make_shared(); + columns_list_matcher->column_list = std::move(column_list); + result = columns_list_matcher; + } + else + { + auto columns_list_matcher = std::make_shared(); + columns_list_matcher->column_list = std::move(column_list); + + auto identifier_parts = qualified_identifier.getParts(); + columns_list_matcher->children.push_back(std::make_shared(std::move(identifier_parts))); + + result = columns_list_matcher; + } + } + + for (const auto & child : children) + result->children.push_back(child->toAST()); + + return result; +} + +} diff --git a/src/Analyzer/MatcherNode.h b/src/Analyzer/MatcherNode.h new file mode 100644 index 00000000000..3635d038549 --- /dev/null +++ b/src/Analyzer/MatcherNode.h @@ -0,0 +1,172 @@ +#pragma once + +#include + +#include +#include +#include +#include + + +namespace DB +{ + +/** Matcher query tree node. + * Matcher can be unqualified with identifier and qualified with identifier. + * It can be asterisk or COLUMNS('regexp') or COLUMNS(column_name_1, ...). + * In result we have 6 possible options: + * Unqualified + * 1. * + * 2. COLUMNS('regexp') + * 3. COLUMNS(column_name_1, ...) + * + * Qualified: + * 1. identifier.* + * 2. identifier.COLUMNS('regexp') + * 3. identifier.COLUMNS(column_name_1, ...) + * + * Matcher must be resolved during query analysis pass. + * + * Matchers can be applied to compound expressions. + * Example: SELECT compound_column AS a, a.* FROM test_table. + * Example: SELECT compound_column.* FROM test_table. + * + * Example: SELECT * FROM test_table; + * Example: SELECT test_table.* FROM test_table. + * Example: SELECT a.* FROM test_table AS a. + * + * Additionally each matcher can contain transformers, check ColumnTransformers.h. + * In query tree matchers column transformers are represended as ListNode. + */ +enum class MatcherNodeType +{ + ASTERISK, + COLUMNS_REGEXP, + COLUMNS_LIST +}; + +const char * toString(MatcherNodeType matcher_node_type); + +class MatcherNode; +using MatcherNodePtr = std::shared_ptr; + +class MatcherNode final : public IQueryTreeNode +{ +public: + /// Variant unqualified asterisk + explicit MatcherNode(ColumnTransformersNodes column_transformers_ = {}); + + /// Variant qualified asterisk + explicit MatcherNode(Identifier qualified_identifier_, ColumnTransformersNodes column_transformers_ = {}); + + /// Variant unqualified COLUMNS('regexp') + explicit MatcherNode(std::shared_ptr columns_matcher_, ColumnTransformersNodes column_transformers_ = {}); + + /// Variant qualified COLUMNS('regexp') + explicit MatcherNode(Identifier qualified_identifier_, std::shared_ptr columns_matcher_, ColumnTransformersNodes column_transformers_ = {}); + + /// Variant unqualified COLUMNS(column_name_1, ...) + explicit MatcherNode(Identifiers columns_identifiers_, ColumnTransformersNodes column_transformers_ = {}); + + /// Variant qualified COLUMNS(column_name_1, ...) + explicit MatcherNode(Identifier qualified_identifier_, Identifiers columns_identifiers_, ColumnTransformersNodes column_transformers_ = {}); + + /// Get matcher type + MatcherNodeType getMatcherType() const + { + return matcher_type; + } + + /// Returns true if matcher is asterisk matcher, false otherwise + bool isAsteriskMatcher() const + { + return matcher_type == MatcherNodeType::ASTERISK; + } + + /// Returns true if matcher is columns regexp or columns list matcher, false otherwise + bool isColumnsMatcher() const + { + return matcher_type == MatcherNodeType::COLUMNS_REGEXP || matcher_type == MatcherNodeType::COLUMNS_LIST; + } + + /// Returns true if matcher is qualified, false otherwise + bool isQualified() const + { + return !qualified_identifier.empty(); + } + + /// Returns true if matcher is not qualified, false otherwise + bool isUnqualified() const + { + return qualified_identifier.empty(); + } + + /// Get qualified identifier + const Identifier & getQualifiedIdentifier() const + { + return qualified_identifier; + } + + /// Get columns matcher. Valid only if this matcher has type COLUMNS_REGEXP. + const std::shared_ptr & getColumnsMatcher() const + { + return columns_matcher; + } + + /// Get columns identifiers. Valid only if this matcher has type COLUMNS_LIST. + const Identifiers & getColumnsIdentifiers() const + { + return columns_identifiers; + } + + /// Get column transformers + const ListNode & getColumnTransformers() const + { + return children[column_transformers_child_index]->as(); + } + + /// Get column transformers + const QueryTreeNodePtr & getColumnTransformersNode() const + { + return children[column_transformers_child_index]; + } + + /// Returns true if matcher match column name, false otherwise + bool isMatchingColumn(const std::string & column_name); + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::MATCHER; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + explicit MatcherNode(MatcherNodeType matcher_type_, + Identifier qualified_identifier_, + Identifiers columns_identifiers_, + std::shared_ptr columns_matcher_, + ColumnTransformersNodes column_transformers_); + + MatcherNodeType matcher_type; + Identifier qualified_identifier; + Identifiers columns_identifiers; + std::shared_ptr columns_matcher; + std::unordered_set columns_identifiers_set; + + static constexpr size_t column_transformers_child_index = 0; + static constexpr size_t children_size = column_transformers_child_index + 1; +}; + +} diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp new file mode 100644 index 00000000000..dcf386b2988 --- /dev/null +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -0,0 +1,170 @@ +#include + +#include +#include + +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_TYPE_OF_FIELD; +} + +namespace +{ + +Field zeroField(const Field & value) +{ + switch (value.getType()) + { + case Field::Types::UInt64: return static_cast(0); + case Field::Types::Int64: return static_cast(0); + case Field::Types::Float64: return static_cast(0); + case Field::Types::UInt128: return static_cast(0); + case Field::Types::Int128: return static_cast(0); + case Field::Types::UInt256: return static_cast(0); + case Field::Types::Int256: return static_cast(0); + default: + break; + } + + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unexpected literal type in function"); +} + +/** Rewrites: sum([multiply|divide]) -> [multiply|divide](sum) + * [min|max|avg]([multiply|divide|plus|minus]) -> [multiply|divide|plus|minus]([min|max|avg]) + * + * TODO: Support `groupBitAnd`, `groupBitOr`, `groupBitXor` functions. + * TODO: Support rewrite `f((2 * n) * n)` into '2 * f(n * n)'. + */ +class AggregateFunctionsArithmericOperationsVisitor : public InDepthQueryTreeVisitor +{ +public: + /// Traverse tree bottom to top + static bool shouldTraverseTopToBottom() + { + return false; + } + + static void visitImpl(QueryTreeNodePtr & node) + { + auto * aggregate_function_node = node->as(); + if (!aggregate_function_node || !aggregate_function_node->isAggregateFunction()) + return; + + static std::unordered_map> supported_functions + = {{"sum", {"multiply", "divide"}}, + {"min", {"multiply", "divide", "plus", "minus"}}, + {"max", {"multiply", "divide", "plus", "minus"}}, + {"avg", {"multiply", "divide", "plus", "minus"}}}; + + auto & aggregate_function_arguments_nodes = aggregate_function_node->getArguments().getNodes(); + if (aggregate_function_arguments_nodes.size() != 1) + return; + + auto * inner_function_node = aggregate_function_arguments_nodes[0]->as(); + if (!inner_function_node) + return; + + auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes(); + if (inner_function_arguments_nodes.size() != 2) + return; + + /// Aggregate functions[sum|min|max|avg] is case-insensitive, so we use lower cases name + auto lower_function_name = Poco::toLower(aggregate_function_node->getFunctionName()); + + auto supported_function_it = supported_functions.find(lower_function_name); + if (supported_function_it == supported_functions.end()) + return; + + const auto & inner_function_name = inner_function_node->getFunctionName(); + + if (!supported_function_it->second.contains(inner_function_name)) + return; + + auto left_argument_constant_value = inner_function_arguments_nodes[0]->getConstantValueOrNull(); + auto right_argument_constant_value = inner_function_arguments_nodes[1]->getConstantValueOrNull(); + + /** If we extract negative constant, aggregate function name must be updated. + * + * Example: SELECT min(-1 * id); + * Result: SELECT -1 * max(id); + */ + std::string function_name_if_constant_is_negative; + if (inner_function_name == "multiply" || inner_function_name == "divide") + { + if (lower_function_name == "min") + function_name_if_constant_is_negative = "max"; + else if (lower_function_name == "max") + function_name_if_constant_is_negative = "min"; + } + + if (left_argument_constant_value && !right_argument_constant_value) + { + /// Do not rewrite `sum(1/n)` with `sum(1) * div(1/n)` because of lose accuracy + if (inner_function_name == "divide") + return; + + /// Rewrite `aggregate_function(inner_function(constant, argument))` into `inner_function(constant, aggregate_function(argument))` + const auto & left_argument_constant_value_literal = left_argument_constant_value->getValue(); + if (!function_name_if_constant_is_negative.empty() && + left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal)) + { + resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + } + + auto inner_function = aggregate_function_arguments_nodes[0]; + auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]); + aggregate_function_arguments_nodes = {inner_function_right_argument}; + inner_function_arguments_nodes[1] = node; + node = std::move(inner_function); + } + else if (right_argument_constant_value) + { + /// Rewrite `aggregate_function(inner_function(argument, constant))` into `inner_function(aggregate_function(argument), constant)` + const auto & right_argument_constant_value_literal = right_argument_constant_value->getValue(); + if (!function_name_if_constant_is_negative.empty() && + right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal)) + { + resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + } + + auto inner_function = aggregate_function_arguments_nodes[0]; + auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]); + aggregate_function_arguments_nodes = {inner_function_left_argument}; + inner_function_arguments_nodes[0] = node; + node = std::move(inner_function); + } + } + +private: + static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) + { + auto function_result_type = function_node.getResultType(); + auto function_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, + function_aggregate_function->getArgumentTypes(), + function_aggregate_function->getParameters(), + properties); + + function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + } +}; + +} + +void AggregateFunctionsArithmericOperationsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + AggregateFunctionsArithmericOperationsVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h new file mode 100644 index 00000000000..a89d2f87ad9 --- /dev/null +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +/** Extract arithmeric operations from aggregate functions. + * + * Example: SELECT sum(a * 2); + * Result: SELECT sum(a) * 2; + */ +class AggregateFunctionsArithmericOperationsPass final : public IQueryTreePass +{ +public: + String getName() override { return "AggregateFunctionsArithmericOperations"; } + + String getDescription() override { return "Extract arithmeric operations from aggregate functions."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp new file mode 100644 index 00000000000..2b55efa3552 --- /dev/null +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -0,0 +1,85 @@ +#include + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class CountDistinctVisitor : public InDepthQueryTreeVisitor +{ +public: + static void visitImpl(QueryTreeNodePtr & node) + { + auto * query_node = node->as(); + + /// Check that query has only SELECT clause + if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() || + query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() || + query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset())) + return; + + /// Check that query has only single table expression + auto join_tree_node_type = query_node->getJoinTree()->getNodeType(); + if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN) + return; + + /// Check that query has only single node in projection + auto & projection_nodes = query_node->getProjection().getNodes(); + if (projection_nodes.size() != 1) + return; + + /// Check that query single projection node is `countDistinct` function + auto & projection_node = projection_nodes[0]; + auto * function_node = projection_node->as(); + if (!function_node) + return; + + auto lower_function_name = Poco::toLower(function_node->getFunctionName()); + if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact") + return; + + /// Check that `countDistinct` function has single COLUMN argument + auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); + if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN) + return; + + auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; + auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); + + /// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column + auto subquery = std::make_shared(); + subquery->getJoinTree() = query_node->getJoinTree(); + subquery->getProjection().getNodes().push_back(count_distinct_argument_column); + subquery->getGroupBy().getNodes().push_back(count_distinct_argument_column); + subquery->resolveProjectionColumns({count_distinct_argument_column_typed.getColumn()}); + + /// Put subquery into JOIN TREE of initial query + query_node->getJoinTree() = std::move(subquery); + + /// Replace `countDistinct` of initial query into `count` + auto result_type = function_node->getResultType(); + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(result_type)); + function_node->getArguments().getNodes().clear(); + } +}; + +} + +void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + CountDistinctVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/CountDistinctPass.h b/src/Analyzer/Passes/CountDistinctPass.h new file mode 100644 index 00000000000..cac5033c98f --- /dev/null +++ b/src/Analyzer/Passes/CountDistinctPass.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +/** Optimize single `countDistinct` into `count` over subquery. + * + * Example: SELECT countDistinct(column) FROM table; + * Result: SELECT count() FROM (SELECT column FROM table GROUP BY column); + */ +class CountDistinctPass final : public IQueryTreePass +{ +public: + String getName() override { return "CountDistinct"; } + + String getDescription() override + { + return "Optimize single countDistinct into count over subquery"; + } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/CustomizeFunctionsPass.cpp b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp new file mode 100644 index 00000000000..629ab411a55 --- /dev/null +++ b/src/Analyzer/Passes/CustomizeFunctionsPass.cpp @@ -0,0 +1,175 @@ +#include + +#include +#include + +#include + +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +class CustomizeFunctionsVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit CustomizeFunctionsVisitor(ContextPtr & context_) + : context(context_) + {} + + void visitImpl(QueryTreeNodePtr & node) const + { + auto * function_node = node->as(); + if (!function_node) + return; + + const auto & settings = context->getSettingsRef(); + + /// After successful function replacement function name and function name lowercase must be recalculated + auto function_name = function_node->getFunctionName(); + auto function_name_lowercase = Poco::toLower(function_name); + + if (function_node->isAggregateFunction() || function_node->isWindowFunction()) + { + auto count_distinct_implementation_function_name = String(settings.count_distinct_implementation); + + /// Replace countDistinct with countDistinct implementation + if (function_name_lowercase == "countdistinct") + { + resolveAggregateOrWindowFunctionNode(*function_node, count_distinct_implementation_function_name); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + } + + /// Replace countIfDistinct with countDistinctIf implementation + if (function_name_lowercase == "countifdistinct") + { + resolveAggregateOrWindowFunctionNode(*function_node, count_distinct_implementation_function_name + "If"); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + } + + /// Replace aggregateFunctionIfDistinct into aggregateFunctionDistinctIf to make execution more optimal + if (function_name_lowercase.ends_with("ifdistinct")) + { + size_t prefix_length = function_name_lowercase.size() - strlen("ifdistinct"); + auto updated_function_name = function_name_lowercase.substr(0, prefix_length) + "DistinctIf"; + resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + } + + /// Rewrite all aggregate functions to add -OrNull suffix to them + if (settings.aggregate_functions_null_for_empty && !function_name.ends_with("OrNull")) + { + auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(function_name); + if (function_properies && !function_properies->returns_default_when_only_null) + { + auto updated_function_name = function_name + "OrNull"; + resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + } + } + + /** Move -OrNull suffix ahead, this should execute after add -OrNull suffix. + * Used to rewrite aggregate functions with -OrNull suffix in some cases. + * Example: sumIfOrNull. + * Result: sumOrNullIf. + */ + if (function_name.ends_with("OrNull")) + { + auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(function_name); + if (function_properies && !function_properies->returns_default_when_only_null) + { + size_t function_name_size = function_name.size(); + + static constexpr std::array suffixes_to_replace = {"MergeState", "Merge", "State", "If"}; + for (const auto & suffix : suffixes_to_replace) + { + auto suffix_string_value = String(suffix); + auto suffix_to_check = suffix_string_value + "OrNull"; + + if (!function_name.ends_with(suffix_to_check)) + continue; + + auto updated_function_name = function_name.substr(0, function_name_size - suffix_to_check.size()) + "OrNull" + suffix_string_value; + resolveAggregateOrWindowFunctionNode(*function_node, updated_function_name); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + break; + } + } + } + + return; + } + + if (settings.transform_null_in) + { + auto function_result_type = function_node->getResultType(); + + static constexpr std::array, 4> in_function_to_replace_null_in_function_map = + {{ + {"in", "nullIn"}, + {"notin", "notNullIn"}, + {"globalin", "globalNullIn"}, + {"globalnotin", "globalNotNullIn"}, + }}; + + for (const auto & [in_function_name, in_function_name_to_replace] : in_function_to_replace_null_in_function_map) + { + if (function_name_lowercase == in_function_name) + { + resolveOrdinaryFunctionNode(*function_node, String(in_function_name_to_replace)); + function_name = function_node->getFunctionName(); + function_name_lowercase = Poco::toLower(function_name); + break; + } + } + } + } + + static inline void resolveAggregateOrWindowFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) + { + auto function_result_type = function_node.getResultType(); + auto function_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, + function_aggregate_function->getArgumentTypes(), + function_aggregate_function->getParameters(), + properties); + + if (function_node.isAggregateFunction()) + function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + else if (function_node.isWindowFunction()) + function_node.resolveAsWindowFunction(std::move(aggregate_function), std::move(function_result_type)); + } + + inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + { + auto function_result_type = function_node.getResultType(); + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function, std::move(function_result_type)); + } + +private: + ContextPtr & context; +}; + +} + +void CustomizeFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + CustomizeFunctionsVisitor visitor(context); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/CustomizeFunctionsPass.h b/src/Analyzer/Passes/CustomizeFunctionsPass.h new file mode 100644 index 00000000000..7145099ca4c --- /dev/null +++ b/src/Analyzer/Passes/CustomizeFunctionsPass.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/** Customize aggregate functions and `in` functions implementations. + * + * Example: SELECT countDistinct(); + * Result: SELECT countDistinctImplementation(); + * Function countDistinctImplementation is taken from settings.count_distinct_implementation. + */ +class CustomizeFunctionsPass final : public IQueryTreePass +{ +public: + String getName() override { return "CustomizeFunctions"; } + + String getDescription() override { return "Customize implementation of aggregate functions, and in functions."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp new file mode 100644 index 00000000000..41cc7bf18b1 --- /dev/null +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -0,0 +1,211 @@ +#include + +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit FunctionToSubcolumnsVisitor(ContextPtr & context_) + : context(context_) + {} + + void visitImpl(QueryTreeNodePtr & node) const + { + auto * function_node = node->as(); + if (!function_node) + return; + + auto & function_arguments_nodes = function_node->getArguments().getNodes(); + size_t function_arguments_nodes_size = function_arguments_nodes.size(); + + if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2) + return; + + auto * first_argument_column_node = function_arguments_nodes.front()->as(); + + if (!first_argument_column_node) + return; + + auto column_source = first_argument_column_node->getColumnSource(); + auto * table_node = column_source->as(); + + if (!table_node) + return; + + const auto & storage = table_node->getStorage(); + if (!storage->supportsSubcolumns()) + return; + + auto column = first_argument_column_node->getColumn(); + WhichDataType column_type(column.type); + + const auto & function_name = function_node->getFunctionName(); + + if (function_arguments_nodes_size == 1) + { + if (column_type.isArray()) + { + if (function_name == "length") + { + /// Replace `length(array_argument)` with `array_argument.size0` + column.name += ".size0"; + + node = std::make_shared(column, column_source); + } + else if (function_name == "empty") + { + /// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)` + column.name += ".size0"; + column.type = std::make_shared(); + + resolveOrdinaryFunctionNode(*function_node, "equals"); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, column_source)); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + } + else if (function_name == "notEmpty") + { + /// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)` + column.name += ".size0"; + column.type = std::make_shared(); + + resolveOrdinaryFunctionNode(*function_node, "notEquals"); + + function_arguments_nodes.clear(); + function_arguments_nodes.push_back(std::make_shared(column, column_source)); + function_arguments_nodes.push_back(std::make_shared(static_cast(0))); + } + } + else if (column_type.isNullable()) + { + if (function_name == "isNull") + { + /// Replace `isNull(nullable_argument)` with `nullable_argument.null` + column.name += ".null"; + + node = std::make_shared(column, column_source); + } + else if (function_name == "isNotNull") + { + /// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)` + column.name += ".null"; + column.type = std::make_shared(); + + resolveOrdinaryFunctionNode(*function_node, "not"); + + function_arguments_nodes = {std::make_shared(column, column_source)}; + } + } + else if (column_type.isMap()) + { + if (function_name == "mapKeys") + { + /// Replace `mapKeys(map_argument)` with `map_argument.keys` + column.name += ".keys"; + column.type = function_node->getResultType(); + + node = std::make_shared(column, column_source); + } + else if (function_name == "mapValues") + { + /// Replace `mapValues(map_argument)` with `map_argument.values` + column.name += ".values"; + column.type = function_node->getResultType(); + + node = std::make_shared(column, column_source); + } + } + } + else + { + auto second_argument_constant_value = function_arguments_nodes[1]->getConstantValueOrNull(); + + if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_value) + { + /** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` + * with `tuple_argument.column_name`. + */ + const auto & tuple_element_constant_value = second_argument_constant_value->getValue(); + const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType(); + + const auto & data_type_tuple = assert_cast(*column.type); + + String subcolumn_name; + + if (tuple_element_constant_value_type == Field::Types::String) + { + subcolumn_name = tuple_element_constant_value.get(); + } + else if (tuple_element_constant_value_type == Field::Types::UInt64) + { + auto tuple_column_index = tuple_element_constant_value.get(); + subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index); + } + else + { + return; + } + + column.name += '.'; + column.name += subcolumn_name; + column.type = function_node->getResultType(); + + node = std::make_shared(column, column_source); + } + else if (function_name == "mapContains" && column_type.isMap()) + { + const auto & data_type_map = assert_cast(*column.type); + + /// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)` + column.name += ".keys"; + column.type = data_type_map.getKeyType(); + + auto has_function_argument = std::make_shared(column, column_source); + resolveOrdinaryFunctionNode(*function_node, "has"); + + function_arguments_nodes[0] = std::move(has_function_argument); + } + } + } + +private: + inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + { + auto function_result_type = function_node.getResultType(); + auto function = FunctionFactory::instance().get(function_name, context); + function_node.resolveAsFunction(function, std::move(function_result_type)); + } + + ContextPtr & context; +}; + +} + +void FunctionToSubcolumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + FunctionToSubcolumnsVisitor visitor(context); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.h b/src/Analyzer/Passes/FunctionToSubcolumnsPass.h new file mode 100644 index 00000000000..e31c39a8ff3 --- /dev/null +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.h @@ -0,0 +1,31 @@ +#pragma once + +#include + +namespace DB +{ + +/** Transform functions to subcolumns. + * It can help to reduce amount of read data. + * + * Example: SELECT tupleElement(column, subcolumn) FROM test_table; + * Result: SELECT column.subcolumn FROM test_table; + * + * Example: SELECT length(array_column) FROM test_table; + * Result: SELECT array_column.size0 FROM test_table; + * + * Example: SELECT nullable_column IS NULL FROM test_table; + * Result: SELECT nullable_column.null FROM test_table; + */ +class FunctionToSubcolumnsPass final : public IQueryTreePass +{ +public: + String getName() override { return "FunctionToSubcolumns"; } + + String getDescription() override { return "Rewrite function to subcolumns, for example tupleElement(column, subcolumn) into column.subcolumn"; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp new file mode 100644 index 00000000000..f400b11765e --- /dev/null +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -0,0 +1,75 @@ +#include + +#include + +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class IfChainToMultiIfPassVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit IfChainToMultiIfPassVisitor(FunctionOverloadResolverPtr multi_if_function_ptr_) + : multi_if_function_ptr(std::move(multi_if_function_ptr_)) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "if" || function_node->getArguments().getNodes().size() != 3) + return; + + std::vector multi_if_arguments; + + auto & function_node_arguments = function_node->getArguments().getNodes(); + multi_if_arguments.insert(multi_if_arguments.end(), function_node_arguments.begin(), function_node_arguments.end()); + + QueryTreeNodePtr if_chain_node = multi_if_arguments.back(); + + while (true) + { + /// Check if last `multiIf` argument is `if` function + auto * if_chain_function_node = if_chain_node->as(); + if (!if_chain_function_node || if_chain_function_node->getFunctionName() != "if" || if_chain_function_node->getArguments().getNodes().size() != 3) + break; + + /// Replace last `multiIf` argument with `if` function arguments + + multi_if_arguments.pop_back(); + + auto & if_chain_function_node_arguments = if_chain_function_node->getArguments().getNodes(); + multi_if_arguments.insert(multi_if_arguments.end(), if_chain_function_node_arguments.begin(), if_chain_function_node_arguments.end()); + + /// Use last `multiIf` argument for next check + if_chain_node = multi_if_arguments.back(); + } + + /// Do not replace `if` with 3 arguments to `multiIf` + if (multi_if_arguments.size() <= 3) + return; + + auto multi_if_function = std::make_shared("multiIf"); + multi_if_function->resolveAsFunction(multi_if_function_ptr, std::make_shared()); + multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments); + node = std::move(multi_if_function); + } + +private: + FunctionOverloadResolverPtr multi_if_function_ptr; +}; + +} + +void IfChainToMultiIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + IfChainToMultiIfPassVisitor visitor(FunctionFactory::instance().get("multiIf", context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.h b/src/Analyzer/Passes/IfChainToMultiIfPass.h new file mode 100644 index 00000000000..43f3fb8831d --- /dev/null +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +/** Convert `if` chain into single `multiIf`. + * Replace if(cond_1, then_1_value, if(cond_2, ...)) chains into multiIf(cond_1, then_1_value, cond_2, ...). + * + * Example: SELECT if(cond_1, then_1_value, if(cond_2, then_2_value, else_value)); + * Result: SELECT multiIf(cond_1, then_1_value, cond_2, then_2_value, else_value); + */ +class IfChainToMultiIfPass final : public IQueryTreePass +{ +public: + String getName() override { return "IfChainToMultiIf"; } + + String getDescription() override { return "Optimize if chain to multiIf"; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/IfConstantConditionPass.cpp b/src/Analyzer/Passes/IfConstantConditionPass.cpp new file mode 100644 index 00000000000..1da1f5bd471 --- /dev/null +++ b/src/Analyzer/Passes/IfConstantConditionPass.cpp @@ -0,0 +1,56 @@ +#include + +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class IfConstantConditionVisitor : public InDepthQueryTreeVisitor +{ +public: + static void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || (function_node->getFunctionName() != "if" && function_node->getFunctionName() != "multiIf")) + return; + + if (function_node->getArguments().getNodes().size() != 3) + return; + + auto & first_argument = function_node->getArguments().getNodes()[0]; + auto first_argument_constant_value = first_argument->getConstantValueOrNull(); + if (!first_argument_constant_value) + return; + + const auto & condition_value = first_argument_constant_value->getValue(); + + bool condition_boolean_value = false; + + if (condition_value.getType() == Field::Types::Int64) + condition_boolean_value = static_cast(condition_value.safeGet()); + else if (condition_value.getType() == Field::Types::UInt64) + condition_boolean_value = static_cast(condition_value.safeGet()); + else + return; + + if (condition_boolean_value) + node = function_node->getArguments().getNodes()[1]; + else + node = function_node->getArguments().getNodes()[2]; + } +}; + +} + +void IfConstantConditionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + IfConstantConditionVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/IfConstantConditionPass.h b/src/Analyzer/Passes/IfConstantConditionPass.h new file mode 100644 index 00000000000..7817e67aa5e --- /dev/null +++ b/src/Analyzer/Passes/IfConstantConditionPass.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +/** Convert `if` with constant condition or `multiIf` with single constant condition into true condition argument value + * or false condition argument value. + * + * Example: SELECT if(1, true_value, false_value); + * Result: SELECT true_value; + * + * Example: SELECT if(0, true_value, false_value); + * Result: SELECT false_value; + */ +class IfConstantConditionPass final : public IQueryTreePass +{ +public: + String getName() override { return "IfConstantCondition"; } + + String getDescription() override { return "Optimize if, multiIf for constant condition."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp new file mode 100644 index 00000000000..6d2ebac33e6 --- /dev/null +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -0,0 +1,45 @@ +#include + +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class MultiIfToIfVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit MultiIfToIfVisitor(FunctionOverloadResolverPtr if_function_ptr_) + : if_function_ptr(if_function_ptr_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "multiIf") + return; + + if (function_node->getArguments().getNodes().size() != 3) + return; + + auto result_type = function_node->getResultType(); + function_node->resolveAsFunction(if_function_ptr, std::move(result_type)); + } + +private: + FunctionOverloadResolverPtr if_function_ptr; +}; + +} + +void MultiIfToIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + MultiIfToIfVisitor visitor(FunctionFactory::instance().get("if", context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/MultiIfToIfPass.h b/src/Analyzer/Passes/MultiIfToIfPass.h new file mode 100644 index 00000000000..2213f3713ed --- /dev/null +++ b/src/Analyzer/Passes/MultiIfToIfPass.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +/** Convert `multiIf` with single condition into `if`. + * + * Example: SELECT multiIf(x, 1, 0); + * Result: SELECT if(x, 1, 0); + */ +class MultiIfToIfPass final : public IQueryTreePass +{ +public: + String getName() override { return "MultiIfToIf"; } + + String getDescription() override { return "Optimize multiIf with single condition to if."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp new file mode 100644 index 00000000000..8c92ecc3900 --- /dev/null +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -0,0 +1,58 @@ +#include + +#include +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +class NormalizeCountVariantsVisitor : public InDepthQueryTreeVisitor +{ +public: + static void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !function_node->isAggregateFunction() || (function_node->getFunctionName() != "count" && function_node->getFunctionName() != "sum")) + return; + + if (function_node->getArguments().getNodes().size() != 1) + return; + + auto & first_argument = function_node->getArguments().getNodes()[0]; + auto first_argument_constant_value = first_argument->getConstantValueOrNull(); + if (!first_argument_constant_value) + return; + + const auto & first_argument_constant_literal = first_argument_constant_value->getValue(); + + if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) + { + function_node->getArguments().getNodes().clear(); + } + else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && + first_argument_constant_literal.get() == 1) + { + auto result_type = function_node->getResultType(); + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(result_type)); + function_node->getArguments().getNodes().clear(); + } + } +}; + +} + +void NormalizeCountVariantsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + NormalizeCountVariantsVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.h b/src/Analyzer/Passes/NormalizeCountVariantsPass.h new file mode 100644 index 00000000000..78a114f4a85 --- /dev/null +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +/** Remove single literal argument from `count`. Convert `sum` with single `1` literal argument into `count`. + * + * Example: SELECT count(1); + * Result: SELECT count(); + * + * Example: SELECT sum(1); + * Result: SELECT count(); + */ +class NormalizeCountVariantsPass final : public IQueryTreePass +{ +public: + String getName() override { return "NormalizeCountVariants"; } + + String getDescription() override { return "Optimize count(literal), sum(1) into count()."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp new file mode 100644 index 00000000000..0232d8958ff --- /dev/null +++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp @@ -0,0 +1,97 @@ +#include + +#include +#include +#include + +namespace DB +{ + +namespace +{ + +struct QueryTreeNodeHash +{ + size_t operator()(const IQueryTreeNode * node) const + { + return node->getTreeHash().first; + } +}; + +struct QueryTreeNodeEqualTo +{ + size_t operator()(const IQueryTreeNode * lhs_node, const IQueryTreeNode * rhs_node) const + { + return lhs_node->isEqual(*rhs_node); + } +}; + +using QueryTreeNodeSet = std::unordered_set; + +class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor +{ +public: + void visitImpl(QueryTreeNodePtr & node) + { + auto * query_node = node->as(); + if (!query_node) + return; + + if (query_node->hasOrderBy()) + { + QueryTreeNodes result_nodes; + + auto & query_order_by_nodes = query_node->getOrderBy().getNodes(); + + for (auto & sort_node : query_order_by_nodes) + { + auto & sort_node_typed = sort_node->as(); + + /// Skip elements with WITH FILL + if (sort_node_typed.withFill()) + { + result_nodes.push_back(sort_node); + continue; + } + + auto [_, inserted] = unique_expressions_nodes_set.emplace(sort_node_typed.getExpression().get()); + if (inserted) + result_nodes.push_back(sort_node); + } + + query_order_by_nodes = std::move(result_nodes); + } + + unique_expressions_nodes_set.clear(); + + if (query_node->hasLimitBy()) + { + QueryTreeNodes result_nodes; + + auto & query_limit_by_nodes = query_node->getLimitBy().getNodes(); + + for (auto & limit_by_node : query_limit_by_nodes) + { + auto [_, inserted] = unique_expressions_nodes_set.emplace(limit_by_node.get()); + if (inserted) + result_nodes.push_back(limit_by_node); + } + + query_limit_by_nodes = std::move(result_nodes); + } + } + +private: + QueryTreeNodeSet unique_expressions_nodes_set; +}; + +} + +void OrderByLimitByDuplicateEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + OrderByLimitByDuplicateEliminationVisitor visitor; + visitor.visit(query_tree_node); +} + +} + diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h new file mode 100644 index 00000000000..11a025af5b9 --- /dev/null +++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +/** Eliminate duplicate columns from ORDER BY and LIMIT BY. + * + * Example: SELECT * FROM test_table ORDER BY id, id; + * Result: SELECT * FROM test_table ORDER BY id; + * + * Example: SELECT * FROM test_table LIMIT 5 BY id, id; + * Result: SELECT * FROM test_table LIMIT 5 BY id; + */ +class OrderByLimitByDuplicateEliminationPass final : public IQueryTreePass +{ +public: + String getName() override { return "OrderByLimitByDuplicateElimination"; } + + String getDescription() override { return "Remove duplicate columns from ORDER BY, LIMIT BY."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp b/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp new file mode 100644 index 00000000000..f70ec27ba5d --- /dev/null +++ b/src/Analyzer/Passes/OrderByTupleEliminationPass.cpp @@ -0,0 +1,59 @@ +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class OrderByTupleEliminationVisitor : public InDepthQueryTreeVisitor +{ +public: + static void visitImpl(QueryTreeNodePtr & node) + { + auto * query_node = node->as(); + if (!query_node || !query_node->hasOrderBy()) + return; + + QueryTreeNodes result_nodes; + + for (auto & sort_node : query_node->getOrderBy().getNodes()) + { + auto & sort_node_typed = sort_node->as(); + auto * function_expression = sort_node_typed.getExpression()->as(); + if (sort_node_typed.withFill() || !function_expression || function_expression->getFunctionName() != "tuple") + { + result_nodes.push_back(sort_node); + continue; + } + + auto & tuple_arguments_nodes = function_expression->getArguments().getNodes(); + for (auto & argument_node : tuple_arguments_nodes) + { + auto result_sort_node = std::make_shared(argument_node, + sort_node_typed.getSortDirection(), + sort_node_typed.getNullsSortDirection(), + sort_node_typed.getCollator()); + result_nodes.push_back(std::move(result_sort_node)); + } + } + + query_node->getOrderBy().getNodes() = std::move(result_nodes); + } +}; + +} + +void OrderByTupleEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + OrderByTupleEliminationVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/OrderByTupleEliminationPass.h b/src/Analyzer/Passes/OrderByTupleEliminationPass.h new file mode 100644 index 00000000000..5665561e227 --- /dev/null +++ b/src/Analyzer/Passes/OrderByTupleEliminationPass.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +/** Eliminate tuples from ORDER BY. + * + * Example: SELECT * FROM test_table ORDER BY (a, b); + * Result: SELECT * FROM test_table ORDER BY a, b; + */ +class OrderByTupleEliminationPass final : public IQueryTreePass +{ +public: + String getName() override { return "OrderByTupleElimination"; } + + String getDescription() override { return "Remove tuple from ORDER BY."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp new file mode 100644 index 00000000000..e55f296e1b7 --- /dev/null +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -0,0 +1,5720 @@ +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_FUNCTION; + extern const int LOGICAL_ERROR; + extern const int CYCLIC_ALIASES; + extern const int INCORRECT_RESULT_OF_SCALAR_SUBQUERY; + extern const int BAD_ARGUMENTS; + extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; + extern const int TYPE_MISMATCH; + extern const int AMBIGUOUS_IDENTIFIER; + extern const int INVALID_WITH_FILL_EXPRESSION; + extern const int INVALID_LIMIT_EXPRESSION; + extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; + extern const int TOO_DEEP_SUBQUERIES; + extern const int UNKNOWN_AGGREGATE_FUNCTION; + extern const int NOT_AN_AGGREGATE; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int ILLEGAL_FINAL; + extern const int SAMPLING_NOT_SUPPORTED; + extern const int NO_COMMON_TYPE; + extern const int NOT_IMPLEMENTED; + extern const int ALIAS_REQUIRED; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h before. + * And additional documentation for each method, where special cases are described in detail. + * + * Each node in query must be resolved. For each query tree node resolved state is specific. + * + * For constant node no resolve process exists, it is resolved during construction. + * + * For table node no resolve process exists, it is resolved during construction. + * + * For function node to be resolved parameters and arguments must be resolved, function node must be initialized with concrete aggregate or + * non aggregate function and with result type. + * + * For lambda node there can be 2 different cases. + * 1. Standalone: WITH (x -> x + 1) AS lambda SELECT lambda(1); Such lambdas are inlined in query tree during query analysis pass. + * 2. Function arguments: WITH (x -> x + 1) AS lambda SELECT arrayMap(lambda, [1, 2, 3]); For such lambda resolution must + * set concrete lambda arguments (initially they are identifier nodes) and resolve lambda expression body. + * + * For query node resolve process must resolve all its inner nodes. + * + * For matcher node resolve process must replace it with matched nodes. + * + * For identifier node resolve process must replace it with concrete non identifier node. This part is most complex because + * for identifier resolution scopes and identifier lookup context play important part. + * + * ClickHouse SQL support lexical scoping for identifier resolution. Scope can be defined by query node or by expression node. + * Expression nodes that can define scope are lambdas and table ALIAS columns. + * + * Identifier lookup context can be expression, function, table. + * + * Examples: WITH (x -> x + 1) as func SELECT func() FROM func; During function `func` resolution identifier lookup is performed + * in function context. + * + * If there are no information of identifier context rules are following: + * 1. Try to resolve identifier in expression context. + * 2. Try to resolve identifier in function context, if it is allowed. Example: SELECT func(arguments); Here func identifier cannot be resolved in function context + * because query projection does not support that. + * 3. Try to resolve identifier in table context, if it is allowed. Example: SELECT table; Here table identifier cannot be resolved in function context + * because query projection does not support that. + * + * TODO: This does not supported properly before, because matchers could not be resolved from aliases. + * + * Identifiers are resolved with following resules: + * Resolution starts with current scope. + * 1. Try to resolve identifier from expression scope arguments. Lambda expression arguments are greatest priority. + * 2. Try to resolve identifier from aliases. + * 3. Try to resolve identifier from join tree if scope is query, or if there are registered table columns in scope. + * Steps 2 and 3 can be changed using prefer_column_name_to_alias setting. + * 4. If it is table lookup, try to resolve identifier from CTE. + * If identifier could not be resolved in current scope, resolution must be continued in parent scopes. + * 5. Try to resolve identifier from parent scopes. + * + * Additional rules about aliases and scopes. + * 1. Parent scope cannot refer alias from child scope. + * 2. Child scope can refer to alias in parent scope. + * + * Example: SELECT arrayMap(x -> x + 1 AS a, [1,2,3]), a; Identifier a is unknown in parent scope. + * Example: SELECT a FROM (SELECT 1 as a); Here we do not refer to alias a from child query scope. But we query it projection result, similar to tables. + * Example: WITH 1 as a SELECT (SELECT a) as b; Here in child scope identifier a is resolved using alias from parent scope. + * + * Additional rules about identifier binding. + * Bind for identifier to entity means that identifier first part match some node during analysis. + * If other parts of identifier cannot be resolved in that node, exception must be thrown. + * + * Example: + * CREATE TABLE test_table (id UInt64, compound_value Tuple(value UInt64)) ENGINE=TinyLog; + * SELECT compound_value.value, 1 AS compound_value FROM test_table; + * Identifier first part compound_value bound to entity with alias compound_value, but nested identifier part cannot be resolved from entity, + * lookup should not be continued, and exception must be thrown because if lookup continues that way identifier can be resolved from join tree. + * + * TODO: This was not supported properly before analyzer because nested identifier could not be resolved from alias. + * + * More complex example: + * CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; + * WITH cast(('Value'), 'Tuple (value UInt64') AS value SELECT (SELECT value FROM test_table); + * Identifier first part value bound to test_table column value, but nested identifier part cannot be resolved from it, + * lookup should not be continued, and exception must be thrown because if lookup continues identifier can be resolved from parent scope. + * + * TODO: Update exception messages + * TODO: JOIN TREE subquery constant columns + * TODO: Table identifiers with optional UUID. + * TODO: Lookup functions arrayReduce(sum, [1, 2, 3]); + * TODO: SELECT (compound_expression).*, (compound_expression).COLUMNS are not supported on parser level. + * TODO: SELECT a.b.c.*, a.b.c.COLUMNS. Qualified matcher where identifier size is greater than 2 are not supported on parser level. + * TODO: Support function identifier resolve from parent query scope, if lambda in parent scope does not capture any columns. + * TODO: Support group_by_use_nulls. + * TODO: Scalar subqueries cache. + */ + +namespace +{ + +/// Identifier lookup context +enum class IdentifierLookupContext : uint8_t +{ + EXPRESSION = 0, + FUNCTION, + TABLE_EXPRESSION, +}; + +const char * toString(IdentifierLookupContext identifier_lookup_context) +{ + switch (identifier_lookup_context) + { + case IdentifierLookupContext::EXPRESSION: return "EXPRESSION"; + case IdentifierLookupContext::FUNCTION: return "FUNCTION"; + case IdentifierLookupContext::TABLE_EXPRESSION: return "TABLE_EXPRESSION"; + } +} + +const char * toStringLowercase(IdentifierLookupContext identifier_lookup_context) +{ + switch (identifier_lookup_context) + { + case IdentifierLookupContext::EXPRESSION: return "expression"; + case IdentifierLookupContext::FUNCTION: return "function"; + case IdentifierLookupContext::TABLE_EXPRESSION: return "table expression"; + } +} + +/** Structure that represent identifier lookup during query analysis. + * Lookup can be in query expression, function, table context. + */ +struct IdentifierLookup +{ + Identifier identifier; + IdentifierLookupContext lookup_context; + + bool isExpressionLookup() const + { + return lookup_context == IdentifierLookupContext::EXPRESSION; + } + + bool isFunctionLookup() const + { + return lookup_context == IdentifierLookupContext::FUNCTION; + } + + bool isTableExpressionLookup() const + { + return lookup_context == IdentifierLookupContext::TABLE_EXPRESSION; + } + + String dump() const + { + return identifier.getFullName() + ' ' + toString(lookup_context); + } +}; + +inline bool operator==(const IdentifierLookup & lhs, const IdentifierLookup & rhs) +{ + return lhs.identifier.getFullName() == rhs.identifier.getFullName() && lhs.lookup_context == rhs.lookup_context; +} + +[[maybe_unused]] inline bool operator!=(const IdentifierLookup & lhs, const IdentifierLookup & rhs) +{ + return !(lhs == rhs); +} + +struct IdentifierLookupHash +{ + size_t operator()(const IdentifierLookup & identifier_lookup) const + { + return std::hash()(identifier_lookup.identifier.getFullName()) ^ static_cast(identifier_lookup.lookup_context); + } +}; + +enum class IdentifierResolvePlace : UInt8 +{ + NONE = 0, + EXPRESSION_ARGUMENTS, + ALIASES, + JOIN_TREE, + /// Valid only for table lookup + CTE, + /// Valid only for table lookup + DATABASE_CATALOG +}; + +const char * toString(IdentifierResolvePlace resolved_identifier_place) +{ + switch (resolved_identifier_place) + { + case IdentifierResolvePlace::NONE: return "NONE"; + case IdentifierResolvePlace::EXPRESSION_ARGUMENTS: return "EXPRESSION_ARGUMENTS"; + case IdentifierResolvePlace::ALIASES: return "ALIASES"; + case IdentifierResolvePlace::JOIN_TREE: return "JOIN_TREE"; + case IdentifierResolvePlace::CTE: return "CTE"; + case IdentifierResolvePlace::DATABASE_CATALOG: return "DATABASE_CATALOG"; + } +} + +struct IdentifierResolveResult +{ + IdentifierResolveResult() = default; + + QueryTreeNodePtr resolved_identifier; + IdentifierResolvePlace resolve_place = IdentifierResolvePlace::NONE; + bool resolved_from_parent_scopes = false; + + [[maybe_unused]] bool isResolved() const + { + return resolve_place != IdentifierResolvePlace::NONE; + } + + [[maybe_unused]] bool isResolvedFromParentScopes() const + { + return resolved_from_parent_scopes; + } + + [[maybe_unused]] bool isResolvedFromExpressionArguments() const + { + return resolve_place == IdentifierResolvePlace::EXPRESSION_ARGUMENTS; + } + + [[maybe_unused]] bool isResolvedFromAliases() const + { + return resolve_place == IdentifierResolvePlace::ALIASES; + } + + [[maybe_unused]] bool isResolvedFromJoinTree() const + { + return resolve_place == IdentifierResolvePlace::JOIN_TREE; + } + + [[maybe_unused]] bool isResolvedFromCTEs() const + { + return resolve_place == IdentifierResolvePlace::CTE; + } + + void dump(WriteBuffer & buffer) const + { + if (!resolved_identifier) + { + buffer << "unresolved"; + return; + } + + buffer << resolved_identifier->formatASTForErrorMessage() << " place " << toString(resolve_place) << " resolved from parent scopes " << resolved_from_parent_scopes; + } + + [[maybe_unused]] String dump() const + { + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); + } +}; + +struct IdentifierResolveSettings +{ + /// Allow to check parent scopes during identifier resolution + bool allow_to_check_parent_scopes = true; + + /// Allow to check join tree during identifier resolution + bool allow_to_check_join_tree = true; + + /// Allow to check CTEs during table identifier resolution + bool allow_to_check_cte = true; + + /// Allow to check database catalog during table identifier resolution + bool allow_to_check_database_catalog = true; + + /// Allow to resolve subquery during identifier resolution + bool allow_to_resolve_subquery_during_identifier_resolution = true; +}; + +struct StringTransparentHash +{ + using is_transparent = void; + using hash = std::hash; + + [[maybe_unused]] size_t operator()(const char * data) const + { + return hash()(data); + } + + size_t operator()(std::string_view data) const + { + return hash()(data); + } + + size_t operator()(const std::string & data) const + { + return hash()(data); + } +}; + +using ColumnNameToColumnNodeMap = std::unordered_map>; + +struct TableExpressionData +{ + std::string table_expression_name; + std::string table_expression_description; + std::string table_name; + std::string database_name; + ColumnNameToColumnNodeMap column_name_to_column_node; + std::unordered_set> column_identifier_first_parts; + + bool hasFullIdentifierName(IdentifierView identifier) const + { + return column_name_to_column_node.contains(std::string_view(identifier.getFullName())); + } + + bool canBindIdentifier(IdentifierView identifier) const + { + return column_identifier_first_parts.contains(std::string_view(identifier.at(0))); + } + + [[maybe_unused]] void dump(WriteBuffer & buffer) const + { + buffer << "Columns size " << column_name_to_column_node.size() << '\n'; + + for (const auto & [column_name, column_node] : column_name_to_column_node) + buffer << "Column name " << column_name << " column node " << column_node->dumpTree() << '\n'; + } + + [[maybe_unused]] String dump() const + { + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); + } +}; + +class ExpressionsStack +{ +public: + void pushNode(const QueryTreeNodePtr & node) + { + if (node->hasAlias()) + { + expressions.emplace_back(node.get(), node->getAlias()); + ++alias_name_to_expressions_size[expressions.back().second]; + return; + } + + expressions.emplace_back(node.get(), std::string()); + } + + void popNode() + { + const auto & [_, top_expression_alias] = expressions.back(); + if (!top_expression_alias.empty()) + { + auto it = alias_name_to_expressions_size.find(top_expression_alias); + --it->second; + + if (it->second == 0) + alias_name_to_expressions_size.erase(it); + } + + expressions.pop_back(); + } + + const IQueryTreeNode * getRoot() const + { + if (expressions.empty()) + return nullptr; + + return expressions.front().first; + } + + const IQueryTreeNode * getTop() const + { + if (expressions.empty()) + return nullptr; + + return expressions.back().first; + } + + bool hasExpressionWithAlias(const std::string & alias) const + { + return alias_name_to_expressions_size.find(alias) != alias_name_to_expressions_size.end(); + } + + [[maybe_unused]] size_t size() const + { + return expressions.size(); + } + + bool empty() const + { + return expressions.empty(); + } + + void dump(WriteBuffer & buffer) const + { + buffer << expressions.size() << '\n'; + + for (const auto & [expression, alias] : expressions) + { + buffer << "Expression "; + buffer << expression->formatASTForErrorMessage(); + + if (!alias.empty()) + buffer << " alias " << alias; + + buffer << '\n'; + } + } + + [[maybe_unused]] String dump() const + { + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); + } + +private: + std::vector> expressions; + std::unordered_map alias_name_to_expressions_size; +}; + +/** Projection names is name of query tree node that is used in projection part of query node. + * Example: SELECT id FROM test_table; + * `id` is projection name of column node + * + * Example: SELECT id AS id_alias FROM test_table; + * `id_alias` is projection name of column node + * + * Calculation of projection names is done during expression nodes resolution. This is done this way + * because after identifier node is resolved we lose information about identifier name. We could + * potentially save this information in query tree node itself, but that would require to clone it in some cases. + * Example: SELECT big_scalar_subquery AS a, a AS b, b AS c; + * All 3 nodes in projection are the same big_scalar_subquery, but they have different projection names. + * If we want to save it in query tree node, we have to clone subquery node that could lead to performance degradation. + * + * Possible solution is to separate query node metadata and query node content. So only node metadata could be cloned + * if we want to change projection name. This solution does not seem to be easy for client of query tree because projection + * name will be part of interface. If we potentially could hide projection names calculation in analyzer without introducing additional + * changes in query tree structure that would be preferable. + * + * Currently each resolve method returns projection names array. Resolve method must compute projection names of node. + * If node is resolved as list node this is case for `untuple` function or `matcher` result projection names array must contain projection names + * for result nodes. + * If node is not resolved as list node, projection names array contain single projection name for node. + * + * Rules for projection names: + * 1. If node has alias. It is node projection name. + * Except scenario where `untuple` function has alias. Example: SELECT untuple(expr) AS alias, alias. + * + * 2. For constant it is constant value string representation. + * + * 3. For identifier: + * If identifier is resolved from JOIN TREE, we want to remove additional identifier qualifications. + * Example: SELECT default.test_table.id FROM test_table. + * Result projection name is `id`. + * + * Example: SELECT t1.id FROM test_table_1 AS t1, test_table_2 AS t2 + * In example both test_table_1, test_table_2 have `id` column. + * In such case projection name is `t1.id` because if additional qualification is removed then column projection name `id` will be ambiguous. + * + * Example: SELECT default.test_table_1.id FROM test_table_1 AS t1, test_table_2 AS t2 + * In such case projection name is `test_table_1.id` because we remove unnecessary database qualification, but table name qualification cannot be removed + * because otherwise column projection name `id` will be ambiguous. + * + * If identifier is not resolved from JOIN TREE. Identifier name is projection name. + * Except scenario where `untuple` function resolved using identifier. Example: SELECT untuple(expr) AS alias, alias. + * Example: SELECT sum(1, 1) AS value, value. + * In such case both nodes have `value` projection names. + * + * Example: SELECT id AS value, value FROM test_table. + * In such case both nodes have have `value` projection names. + * + * Special case is `untuple` function. If `untuple` function specified with alias, then result nodes will have alias.tuple_column_name projection names. + * Example: SELECT cast(tuple(1), 'Tuple(id UInt64)') AS value, untuple(value) AS a; + * Result projection names are `value`, `a.id`. + * + * If `untuple` function does not have alias then result nodes will have `tupleElement(untuple_expression_projection_name, 'tuple_column_name') projection names. + * + * Example: SELECT cast(tuple(1), 'Tuple(id UInt64)') AS value, untuple(value); + * Result projection names are `value`, `tupleElement(value, 'id')`; + * + * 4. For function: + * Projection name consists from function_name(parameters_projection_names)(arguments_projection_names). + * Additionally if function is window function. Window node projection name is used with OVER clause. + * Example: function_name (parameters_names)(argument_projection_names) OVER window_name; + * Example: function_name (parameters_names)(argument_projection_names) OVER (PARTITION BY id ORDER BY id). + * Example: function_name (parameters_names)(argument_projection_names) OVER (window_name ORDER BY id). + * + * 5. For lambda: + * If it is standalone lambda that returns single expression, function projection name is used. + * Example: WITH (x -> x + 1) AS lambda SELECT lambda(1). + * Projection name is `lambda(1)`. + * + * If is it standalone lambda that returns list, projection names of list nodes are used. + * Example: WITH (x -> *) AS lambda SELECT lambda(1) FROM test_table; + * If test_table has two columns `id`, `value`. Then result projection names are `id`, `value`. + * + * If lambda is argument of function. + * Then projection name consists from lambda(tuple(lambda_arguments)(lambda_body_projection_name)); + * + * 6. For matcher: + * Matched nodes projection names are used as matcher projection names. + * + * Matched nodes must be qualified if needed. + * Example: SELECT * FROM test_table_1 AS t1, test_table_2 AS t2. + * In example table test_table_1 and test_table_2 both have `id`, `value` columns. + * Matched nodes after unqualified matcher resolve must be qualified to avoid ambiguous projection names. + * Result projection names must be `t1.id`, `t1.value`, `t2.id`, `t2.value`. + * + * There are special cases + * 1. For lambda inside APPLY matcher transformer: + * Example: SELECT * APPLY x -> toString(x) FROM test_table. + * In such case lambda argument projection name `x` will be replaced by matched node projection name. + * If table has two columns `id` and `value`. Then result projection names are `toString(id)`, `toString(value)`; + * + * 2. For unqualified matcher when JOIN tree contains JOIN with USING. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 USING(id); + * Result projection names must be `id`, `t1.value`, `t2.value`. + * + * 7. For subquery: + * For subquery projection name consists of `_subquery_` prefix and implementation specific unique number suffix. + * Example: SELECT (SELECT 1), (SELECT 1 UNION DISTINCT SELECT 1); + * Result projection name can be `_subquery_1`, `subquery_2`; + * + * 8. For table: + * Table node can be used in expression context only as right argument of IN function. In that case identifier is used + * as table node projection name. + * Example: SELECT id IN test_table FROM test_table; + * Result projection name is `in(id, test_table)`. + */ +using ProjectionName = String; +using ProjectionNames = std::vector; +constexpr auto PROJECTION_NAME_PLACEHOLDER = "__projection_name_placeholder"; + +struct IdentifierResolveScope +{ + /// Construct identifier resolve scope using scope node, and parent scope + IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_) + : scope_node(std::move(scope_node_)) + , parent_scope(parent_scope_) + { + if (parent_scope) + { + subquery_depth = parent_scope->subquery_depth; + context = parent_scope->context; + } + } + + QueryTreeNodePtr scope_node; + + IdentifierResolveScope * parent_scope = nullptr; + + ContextPtr context; + + /// Identifier lookup to result + std::unordered_map identifier_lookup_to_result; + + /// Lambda argument can be expression like constant, column, or it can be function + std::unordered_map expression_argument_name_to_node; + + /// Alias name to query expression node + std::unordered_map alias_name_to_expression_node; + + /// Alias name to lambda node + std::unordered_map alias_name_to_lambda_node; + + /// Alias name to table expression node + std::unordered_map alias_name_to_table_expression_node; + + /// Table column name to column node. Valid only during table ALIAS columns resolve. + ColumnNameToColumnNodeMap column_name_to_column_node; + + /// CTE name to query node + std::unordered_map cte_name_to_query_node; + + /// Window name to window node + std::unordered_map window_name_to_window_node; + + /// Nodes with duplicated aliases + std::unordered_set nodes_with_duplicated_aliases; + + /// Current scope expression in resolve process stack + ExpressionsStack expressions_in_resolve_process_stack; + + /// Table expressions in resolve process + std::unordered_set table_expressions_in_resolve_process; + + /// Current scope expression + std::unordered_set non_cached_identifier_lookups_during_expression_resolve; + + /// Table expression node to data + std::unordered_map table_expression_node_to_data; + + /// Use identifier lookup to result cache + bool use_identifier_lookup_to_result_cache = true; + + /// Subquery depth + size_t subquery_depth = 0; + + /** Scope join tree node for expression. + * Valid only during analysis construction for single expression. + */ + QueryTreeNodePtr expression_join_tree_node; + + [[maybe_unused]] const IdentifierResolveScope * getNearestQueryScope() const + { + const IdentifierResolveScope * scope_to_check = this; + while (scope_to_check != nullptr) + { + if (scope_to_check->scope_node->getNodeType() == QueryTreeNodeType::QUERY) + break; + + scope_to_check = scope_to_check->parent_scope; + } + + return scope_to_check; + } + + IdentifierResolveScope * getNearestQueryScope() + { + IdentifierResolveScope * scope_to_check = this; + while (scope_to_check != nullptr) + { + if (scope_to_check->scope_node->getNodeType() == QueryTreeNodeType::QUERY) + break; + + scope_to_check = scope_to_check->parent_scope; + } + + return scope_to_check; + } + + TableExpressionData & getTableExpressionDataOrThrow(QueryTreeNodePtr table_expression_node) + { + auto it = table_expression_node_to_data.find(table_expression_node); + if (it == table_expression_node_to_data.end()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table expression {} data must be initialized. In scope {}", + table_expression_node->formatASTForErrorMessage(), + scope_node->formatASTForErrorMessage()); + } + + return it->second; + } + + /// Dump identifier resolve scope + [[maybe_unused]] void dump(WriteBuffer & buffer) const + { + buffer << "Scope node " << scope_node->formatASTForErrorMessage() << '\n'; + buffer << "Identifier lookup to result " << identifier_lookup_to_result.size() << '\n'; + for (const auto & [identifier, result] : identifier_lookup_to_result) + { + buffer << "Identifier " << identifier.dump() << " resolve result "; + result.dump(buffer); + buffer << '\n'; + } + + buffer << "Expression argument name to node " << expression_argument_name_to_node.size() << '\n'; + for (const auto & [alias_name, node] : expression_argument_name_to_node) + buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "Alias name to expression node table size " << alias_name_to_expression_node.size() << '\n'; + for (const auto & [alias_name, node] : alias_name_to_expression_node) + buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n'; + + buffer << "Alias name to function node table size " << alias_name_to_lambda_node.size() << '\n'; + for (const auto & [alias_name, node] : alias_name_to_lambda_node) + buffer << "Alias name " << alias_name << " lambda node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "Alias name to table expression node table size " << alias_name_to_table_expression_node.size() << '\n'; + for (const auto & [alias_name, node] : alias_name_to_table_expression_node) + buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "CTE name to query node table size " << cte_name_to_query_node.size() << '\n'; + for (const auto & [cte_name, node] : cte_name_to_query_node) + buffer << "CTE name " << cte_name << " node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "WINDOW name to window node table size " << window_name_to_window_node.size() << '\n'; + for (const auto & [window_name, node] : window_name_to_window_node) + buffer << "CTE name " << window_name << " node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "Nodes with duplicated aliases size " << nodes_with_duplicated_aliases.size() << '\n'; + for (const auto & node : nodes_with_duplicated_aliases) + buffer << "Alias name " << node->getAlias() << " node " << node->formatASTForErrorMessage() << '\n'; + + buffer << "Expression resolve process stack " << '\n'; + expressions_in_resolve_process_stack.dump(buffer); + + buffer << "Table expressions in resolve process size " << table_expressions_in_resolve_process.size() << '\n'; + for (const auto & node : table_expressions_in_resolve_process) + buffer << "Table expression " << node->formatASTForErrorMessage() << '\n'; + + buffer << "Non cached identifier lookups during expression resolve " << non_cached_identifier_lookups_during_expression_resolve.size() << '\n'; + for (const auto & identifier_lookup : non_cached_identifier_lookups_during_expression_resolve) + buffer << "Identifier lookup " << identifier_lookup.dump() << '\n'; + + buffer << "Table expression node to data " << table_expression_node_to_data.size() << '\n'; + for (const auto & [table_expression_node, table_expression_data] : table_expression_node_to_data) + buffer << "Table expression node " << table_expression_node->formatASTForErrorMessage() << " data " << table_expression_data.dump() << '\n'; + + buffer << "Use identifier lookup to result cache " << use_identifier_lookup_to_result_cache << '\n'; + buffer << "Subquery depth " << subquery_depth << '\n'; + } + + [[maybe_unused]] String dump() const + { + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); + } +}; + + +/** Visitor that extracts expression and function aliases from node and initialize scope tables with it. + * Does not go into child lambdas and queries. + * + * Important: + * Identifier nodes with aliases are added both in alias to expression and alias to function map. + * + * These is necessary because identifier with alias can give alias name to any query tree node. + * + * Example: + * WITH (x -> x + 1) AS id, id AS value SELECT value(1); + * In this example id as value is identifier node that has alias, during scope initialization we cannot derive + * that id is actually lambda or expression. + * + * There are no easy solution here, without trying to make full featured expression resolution at this stage. + * Example: + * WITH (x -> x + 1) AS id, id AS id_1, id_1 AS id_2 SELECT id_2(1); + * Example: SELECT a, b AS a, b AS c, 1 AS c; + * + * It is client responsibility after resolving identifier node with alias, make following actions: + * 1. If identifier node was resolved in function scope, remove alias from scope expression map. + * 2. If identifier node was resolved in expression scope, remove alias from scope function map. + * + * That way we separate alias map initialization and expressions resolution. + */ +class QueryExpressionsAliasVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit QueryExpressionsAliasVisitor(IdentifierResolveScope & scope_) + : scope(scope_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + updateAliasesIfNeeded(node, false /*is_lambda_node*/); + } + + bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child) + { + if (auto * lambda_node = child->as()) + { + updateAliasesIfNeeded(child, true /*is_lambda_node*/); + return false; + } + else if (auto * query_tree_node = child->as()) + { + if (query_tree_node->isCTE()) + return false; + + updateAliasesIfNeeded(child, false /*is_lambda_node*/); + return false; + } + else if (auto * union_node = child->as()) + { + if (union_node->isCTE()) + return false; + + updateAliasesIfNeeded(child, false /*is_lambda_node*/); + return false; + } + + return true; + } +private: + void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node) + { + if (!node->hasAlias()) + return; + + const auto & alias = node->getAlias(); + + if (is_lambda_node) + { + if (scope.alias_name_to_expression_node.contains(alias)) + scope.nodes_with_duplicated_aliases.insert(node); + + auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); + if (!inserted) + scope.nodes_with_duplicated_aliases.insert(node); + + return; + } + + if (scope.alias_name_to_lambda_node.contains(alias)) + scope.nodes_with_duplicated_aliases.insert(node); + + auto [_, inserted] = scope.alias_name_to_expression_node.insert(std::make_pair(alias, node)); + if (!inserted) + scope.nodes_with_duplicated_aliases.insert(node); + + /// If node is identifier put it also in scope alias name to lambda node map + if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER) + scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node)); + } + + IdentifierResolveScope & scope; +}; + +class TableExpressionsAliasVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit TableExpressionsAliasVisitor(IdentifierResolveScope & scope_) + : scope(scope_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + updateAliasesIfNeeded(node); + } + + static bool needChildVisit(const QueryTreeNodePtr & node, const QueryTreeNodePtr & child) + { + auto node_type = node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::ARRAY_JOIN: + { + const auto & array_join_node = node->as(); + return child.get() == array_join_node.getTableExpression().get(); + } + case QueryTreeNodeType::JOIN: + { + const auto & join_node = node->as(); + return child.get() == join_node.getLeftTableExpression().get() || child.get() == join_node.getRightTableExpression().get(); + } + default: + { + break; + } + } + + return false; + } + +private: + void updateAliasesIfNeeded(const QueryTreeNodePtr & node) + { + if (!node->hasAlias()) + return; + + const auto & node_alias = node->getAlias(); + auto [_, inserted] = scope.alias_name_to_table_expression_node.emplace(node_alias, node); + if (!inserted) + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + "Multiple table expressions with same alias {}. In scope {}", + node_alias, + scope.scope_node->formatASTForErrorMessage()); + } + + IdentifierResolveScope & scope; +}; + +class QueryAnalyzer +{ +public: + void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context) + { + IdentifierResolveScope scope(node, nullptr /*parent_scope*/); + scope.context = context; + + auto node_type = node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::QUERY: + { + if (table_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "For query analysis table expression must be empty"); + + resolveQuery(node, scope); + break; + } + case QueryTreeNodeType::UNION: + { + if (table_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "For union analysis table expression must be empty"); + + resolveUnion(node, scope); + break; + } + case QueryTreeNodeType::IDENTIFIER: + [[fallthrough]]; + case QueryTreeNodeType::CONSTANT: + [[fallthrough]]; + case QueryTreeNodeType::COLUMN: + [[fallthrough]]; + case QueryTreeNodeType::FUNCTION: + [[fallthrough]]; + case QueryTreeNodeType::LIST: + { + if (!table_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "For expression analysis table expression must not be empty"); + + scope.expression_join_tree_node = table_expression; + validateTableExpressionModifiers(scope.expression_join_tree_node, scope); + initializeTableExpressionColumns(scope.expression_join_tree_node, scope); + + if (node_type == QueryTreeNodeType::LIST) + resolveExpressionNodeList(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + else + resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + break; + } + default: + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Node {} with type {} is not supported by query analyzer. Supported nodes are query, union, identifier, constant, column, function, list.", + node->formatASTForErrorMessage(), + node->getNodeTypeName()); + } + } + } + +private: + /// Utility functions + + static bool isExpressionNodeType(QueryTreeNodeType node_type); + + static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type); + + static bool isTableExpressionNodeType(QueryTreeNodeType node_type); + + static ProjectionName calculateFunctionProjectionName(const QueryTreeNodePtr & function_node, + const ProjectionNames & parameters_projection_names, + const ProjectionNames & arguments_projection_names); + + static ProjectionName calculateWindowProjectionName(const QueryTreeNodePtr & window_node, + const QueryTreeNodePtr & parent_window_node, + const String & parent_window_name, + const ProjectionNames & partition_by_projection_names, + const ProjectionNames & order_by_projection_names, + const ProjectionName & frame_begin_offset_projection_name, + const ProjectionName & frame_end_offset_projection_name); + + static ProjectionName calculateSortColumnProjectionName(const QueryTreeNodePtr & sort_column_node, + const ProjectionName & sort_expression_projection_name, + const ProjectionName & fill_from_expression_projection_name, + const ProjectionName & fill_to_expression_projection_name, + const ProjectionName & fill_step_expression_projection_name); + + static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path); + + static QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context); + + static void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, size_t subquery_depth, ContextPtr context); + + static void mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope); + + static void replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope); + + static void validateLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope); + + static void validateTableExpressionModifiers(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + /// Resolve identifier functions + + static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context); + + QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope); + + static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromAliases(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings = {}); + + QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope); + + static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope); + + QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope); + + IdentifierResolveResult tryResolveIdentifierInParentScopes(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope); + + IdentifierResolveResult tryResolveIdentifier(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings = {}); + + /// Resolve query tree nodes functions + + using QueryTreeNodesWithNames = std::vector>; + + void qualifyMatchedColumnsProjectionNamesIfNeeded(QueryTreeNodesWithNames & matched_nodes_with_column_names, + const QueryTreeNodePtr & table_expression_node, + IdentifierResolveScope & scope); + + QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); + + QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); + + ProjectionNames resolveMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope); + + ProjectionName resolveWindow(QueryTreeNodePtr & window_node, IdentifierResolveScope & scope); + + ProjectionNames resolveLambda(const QueryTreeNodePtr & lambda_node, + const QueryTreeNodePtr & lambda_node_to_resolve, + const QueryTreeNodes & lambda_arguments, + IdentifierResolveScope & scope); + + ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope); + + ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); + + ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression); + + ProjectionNames resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope); + + void resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope); + + void resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope); + + NamesAndTypes resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope); + + void initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope); + + void initializeTableExpressionColumns(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + + void resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor); + + void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope); + + void resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope); + + /// Lambdas that are currently in resolve process + std::unordered_set lambdas_in_resolve_process; + + /// Array join expressions counter + size_t array_join_expressions_counter = 0; + + /// Subquery counter + size_t subquery_counter = 0; + + /// Global expression node to projection name map + std::unordered_map node_to_projection_name; + + /// Global resolve expression node to projection names map + std::unordered_map resolved_expressions; + +}; + +/// Utility functions implementation + + +bool QueryAnalyzer::isExpressionNodeType(QueryTreeNodeType node_type) +{ + return node_type == QueryTreeNodeType::CONSTANT || node_type == QueryTreeNodeType::COLUMN || node_type == QueryTreeNodeType::FUNCTION + || node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION; +} + +bool QueryAnalyzer::isFunctionExpressionNodeType(QueryTreeNodeType node_type) +{ + return node_type == QueryTreeNodeType::LAMBDA; +} + +bool QueryAnalyzer::isTableExpressionNodeType(QueryTreeNodeType node_type) +{ + return node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::TABLE_FUNCTION || + node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION; +} + +ProjectionName QueryAnalyzer::calculateFunctionProjectionName(const QueryTreeNodePtr & function_node, const ProjectionNames & parameters_projection_names, + const ProjectionNames & arguments_projection_names) +{ + const auto & function_node_typed = function_node->as(); + + WriteBufferFromOwnString buffer; + buffer << function_node_typed.getFunctionName(); + + if (!parameters_projection_names.empty()) + { + buffer << '('; + + size_t function_parameters_projection_names_size = parameters_projection_names.size(); + for (size_t i = 0; i < function_parameters_projection_names_size; ++i) + { + buffer << parameters_projection_names[i]; + + if (i + 1 != function_parameters_projection_names_size) + buffer << ", "; + } + + buffer << ')'; + } + + buffer << '('; + + size_t function_arguments_projection_names_size = arguments_projection_names.size(); + for (size_t i = 0; i < function_arguments_projection_names_size; ++i) + { + buffer << arguments_projection_names[i]; + + if (i + 1 != function_arguments_projection_names_size) + buffer << ", "; + } + + buffer << ')'; + + return buffer.str(); +} + +ProjectionName QueryAnalyzer::calculateWindowProjectionName(const QueryTreeNodePtr & window_node, + const QueryTreeNodePtr & parent_window_node, + const String & parent_window_name, + const ProjectionNames & partition_by_projection_names, + const ProjectionNames & order_by_projection_names, + const ProjectionName & frame_begin_offset_projection_name, + const ProjectionName & frame_end_offset_projection_name) +{ + const auto & window_node_typed = window_node->as(); + const auto & window_frame = window_node_typed.getWindowFrame(); + + bool parent_window_node_has_partition_by = false; + bool parent_window_node_has_order_by = false; + + if (parent_window_node) + { + const auto & parent_window_node_typed = parent_window_node->as(); + parent_window_node_has_partition_by = parent_window_node_typed.hasPartitionBy(); + parent_window_node_has_order_by = parent_window_node_typed.hasOrderBy(); + } + + WriteBufferFromOwnString buffer; + + if (!parent_window_name.empty()) + buffer << parent_window_name; + + if (!partition_by_projection_names.empty() && !parent_window_node_has_partition_by) + { + if (!parent_window_name.empty()) + buffer << ' '; + + buffer << "PARTITION BY "; + + size_t partition_by_projection_names_size = partition_by_projection_names.size(); + for (size_t i = 0; i < partition_by_projection_names_size; ++i) + { + buffer << partition_by_projection_names[i]; + if (i + 1 != partition_by_projection_names_size) + buffer << ", "; + } + } + + if (!order_by_projection_names.empty() && !parent_window_node_has_order_by) + { + if (!partition_by_projection_names.empty() || !parent_window_name.empty()) + buffer << ' '; + + buffer << "ORDER BY "; + + size_t order_by_projection_names_size = order_by_projection_names.size(); + for (size_t i = 0; i < order_by_projection_names_size; ++i) + { + buffer << order_by_projection_names[i]; + if (i + 1 != order_by_projection_names_size) + buffer << ", "; + } + } + + if (!window_frame.is_default) + { + if (!partition_by_projection_names.empty() || !order_by_projection_names.empty() || !parent_window_name.empty()) + buffer << ' '; + + buffer << window_frame.type << " BETWEEN "; + if (window_frame.begin_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << frame_begin_offset_projection_name; + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + + buffer << " AND "; + + if (window_frame.end_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << frame_end_offset_projection_name; + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + } + + return buffer.str(); +} + +ProjectionName QueryAnalyzer::calculateSortColumnProjectionName(const QueryTreeNodePtr & sort_column_node, const ProjectionName & sort_expression_projection_name, + const ProjectionName & fill_from_expression_projection_name, const ProjectionName & fill_to_expression_projection_name, const ProjectionName & fill_step_expression_projection_name) +{ + auto & sort_node_typed = sort_column_node->as(); + + WriteBufferFromOwnString sort_column_projection_name_buffer; + sort_column_projection_name_buffer << sort_expression_projection_name; + + auto sort_direction = sort_node_typed.getSortDirection(); + sort_column_projection_name_buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC"); + + auto nulls_sort_direction = sort_node_typed.getNullsSortDirection(); + + if (nulls_sort_direction) + sort_column_projection_name_buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST"); + + if (auto collator = sort_node_typed.getCollator()) + sort_column_projection_name_buffer << " COLLATE " << collator->getLocale(); + + if (sort_node_typed.withFill()) + { + sort_column_projection_name_buffer << " WITH FILL"; + + if (sort_node_typed.hasFillFrom()) + sort_column_projection_name_buffer << " FROM " << fill_from_expression_projection_name; + + if (sort_node_typed.hasFillTo()) + sort_column_projection_name_buffer << " TO " << fill_to_expression_projection_name; + + if (sort_node_typed.hasFillStep()) + sort_column_projection_name_buffer << " STEP " << fill_step_expression_projection_name; + } + + return sort_column_projection_name_buffer.str(); +} + +/** Wrap expression node in tuple element function calls for nested paths. + * Example: Expression node: compound_expression. Nested path: nested_path_1.nested_path_2. + * Result: tupleElement(tupleElement(compound_expression, 'nested_path_1'), 'nested_path_2'). + */ +QueryTreeNodePtr QueryAnalyzer::wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path) +{ + size_t nested_path_parts_size = nested_path.getPartsSize(); + for (size_t i = 0; i < nested_path_parts_size; ++i) + { + const auto & nested_path_part = nested_path[i]; + auto tuple_element_function = std::make_shared("tupleElement"); + + auto & tuple_element_function_arguments_nodes = tuple_element_function->getArguments().getNodes(); + tuple_element_function_arguments_nodes.reserve(2); + tuple_element_function_arguments_nodes.push_back(expression_node); + tuple_element_function_arguments_nodes.push_back(std::make_shared(nested_path_part)); + + expression_node = std::move(tuple_element_function); + } + + return expression_node; +} + +/** Try to get lambda node from sql user defined functions if sql user defined function with function name exists. + * Returns lambda node if function exists, nullptr otherwise. + */ +QueryTreeNodePtr QueryAnalyzer::tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context) +{ + auto user_defined_function = UserDefinedSQLFunctionFactory::instance().tryGet(function_name); + if (!user_defined_function) + return {}; + + const auto & create_function_query = user_defined_function->as(); + auto result_node = buildQueryTree(create_function_query->function_core, context); + if (result_node->getNodeType() != QueryTreeNodeType::LAMBDA) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "SQL user defined function {} must represent lambda expression. Actual {}", + function_name, + create_function_query->function_core->formatForErrorMessage()); + + return result_node; +} + +/// Evaluate scalar subquery and perform constant folding if scalar subquery does not have constant value +void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size_t subquery_depth, ContextPtr context) +{ + auto * query_node = node->as(); + auto * union_node = node->as(); + if (!query_node && !union_node) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Node must have query or union type. Actual {} {}", + node->getNodeTypeName(), + node->formatASTForErrorMessage()); + + if (node->hasConstantValue()) + return; + + auto subquery_context = Context::createCopy(context); + + Settings subquery_settings = context->getSettings(); + subquery_settings.max_result_rows = 1; + subquery_settings.extremes = false; + subquery_context->setSettings(subquery_settings); + + auto options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/); + auto interpreter = std::make_unique(node, options, subquery_context); + + auto io = interpreter->execute(); + + Block block; + PullingAsyncPipelineExecutor executor(io.pipeline); + io.pipeline.setProgressCallback(context->getProgressCallback()); + + while (block.rows() == 0 && executor.pull(block)) + { + } + + if (block.rows() == 0) + { + auto types = interpreter->getSampleBlock().getDataTypes(); + if (types.size() != 1) + types = {std::make_shared(types)}; + + auto & type = types[0]; + if (!type->isNullable()) + { + if (!type->canBeInsideNullable()) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, + "Scalar subquery returned empty result of type {} which cannot be Nullable.", + type->getName()); + + type = makeNullable(type); + } + + auto constant_value = std::make_shared(Null(), std::move(type)); + + if (query_node) + query_node->performConstantFolding(std::move(constant_value)); + else if (union_node) + union_node->performConstantFolding(std::move(constant_value)); + + return; + } + + if (block.rows() != 1) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + + Block tmp_block; + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + { + } + + if (tmp_block.rows() != 0) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + + block = materializeBlock(block); + size_t columns = block.columns(); + + // Block scalar; + Field scalar_value; + DataTypePtr scalar_type; + + if (columns == 1) + { + auto & column = block.getByPosition(0); + /// Here we wrap type to nullable if we can. + /// It is needed cause if subquery return no rows, it's result will be Null. + /// In case of many columns, do not check it cause tuple can't be nullable. + if (!column.type->isNullable() && column.type->canBeInsideNullable()) + { + column.type = makeNullable(column.type); + column.column = makeNullable(column.column); + } + + column.column->get(0, scalar_value); + scalar_type = column.type; + } + else + { + auto tuple_column = ColumnTuple::create(block.getColumns()); + tuple_column->get(0, scalar_value); + scalar_type = std::make_shared(block.getDataTypes(), block.getNames()); + } + + auto constant_value = std::make_shared(std::move(scalar_value), std::move(scalar_type)); + if (query_node) + query_node->performConstantFolding(std::move(constant_value)); + else if (union_node) + union_node->performConstantFolding(std::move(constant_value)); +} + +void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_node, const QueryTreeNodePtr & parent_window_node, IdentifierResolveScope & scope) +{ + auto & window_node_typed = window_node->as(); + auto parent_window_name = window_node_typed.getParentWindowName(); + + auto & parent_window_node_typed = parent_window_node->as(); + + /** If an existing_window_name is specified it must refer to an earlier + * entry in the WINDOW list; the new window copies its partitioning clause + * from that entry, as well as its ordering clause if any. In this case + * the new window cannot specify its own PARTITION BY clause, and it can + * specify ORDER BY only if the copied window does not have one. The new + * window always uses its own frame clause; the copied window must not + * specify a frame clause. + * https://www.postgresql.org/docs/current/sql-select.html + */ + if (window_node_typed.hasPartitionBy()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Derived window definition '{}' is not allowed to override PARTITION BY. In scope {}", + window_node_typed.formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + if (window_node_typed.hasOrderBy() && parent_window_node_typed.hasOrderBy()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Derived window definition '{}' is not allowed to override a non-empty ORDER BY. In scope {}", + window_node_typed.formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + if (!parent_window_node_typed.getWindowFrame().is_default) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Parent window '{}' is not allowed to define a frame: while processing derived window definition '{}'. In scope {}", + parent_window_name, + window_node_typed.formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + window_node_typed.getPartitionByNode() = parent_window_node_typed.getPartitionBy().clone(); + + if (parent_window_node_typed.hasOrderBy()) + window_node_typed.getOrderByNode() = parent_window_node_typed.getOrderBy().clone(); +} + +/** Replace nodes in node list with positional arguments. + * + * Example: SELECT id, value FROM test_table GROUP BY 1, 2; + * Example: SELECT id, value FROM test_table ORDER BY 1, 2; + * Example: SELECT id, value FROM test_table LIMIT 5 BY 1, 2; + */ +void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) +{ + auto & node_list_typed = node_list->as(); + + for (auto & node : node_list_typed.getNodes()) + { + auto * constant_node = node->as(); + if (!constant_node) + continue; + + if (!isNativeNumber(removeNullable(constant_node->getResultType()))) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Positional argument must be constant with numeric type. Actual {}. In scope {}", + constant_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + Field converted = convertFieldToType(constant_node->getValue(), DataTypeUInt64()); + if (converted.isNull()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Positional argument numeric constant expression is not representable as UInt64. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + + UInt64 positional_argument_number = converted.safeGet(); + if (positional_argument_number == 0 || positional_argument_number > projection_nodes.size()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", + positional_argument_number, + projection_nodes.size(), + scope.scope_node->formatASTForErrorMessage()); + + --positional_argument_number; + node = projection_nodes[positional_argument_number]; + } +} + +void QueryAnalyzer::validateLimitOffsetExpression(QueryTreeNodePtr & expression_node, const String & expression_description, IdentifierResolveScope & scope) +{ + const auto limit_offset_constant_value = expression_node->getConstantValueOrNull(); + if (!limit_offset_constant_value || !isNativeNumber(removeNullable(limit_offset_constant_value->getType()))) + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, + "{} expression must be constant with numeric type. Actual {}. In scope {}", + expression_description, + expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + Field converted = convertFieldToType(limit_offset_constant_value->getValue(), DataTypeUInt64()); + if (converted.isNull()) + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, + "{} numeric constant expression is not representable as UInt64", + expression_description); +} + +void QueryAnalyzer::validateTableExpressionModifiers(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + auto * table_node = table_expression_node->as(); + auto * table_function_node = table_expression_node->as(); + auto * query_node = table_expression_node->as(); + auto * union_node = table_expression_node->as(); + + if (!table_node && !table_function_node && !query_node && !union_node) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected table expression. Expected table, table function, query or union node. Actual {}", + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + if (query_node || union_node) + { + auto table_expression_modifiers = query_node ? query_node->getTableExpressionModifiers() : union_node->getTableExpressionModifiers(); + + if (table_expression_modifiers.has_value()) + { + String table_expression_modifiers_error_message; + + if (table_expression_modifiers->hasFinal()) + { + table_expression_modifiers_error_message += "FINAL"; + + if (table_expression_modifiers->hasSampleSizeRatio()) + table_expression_modifiers_error_message += ", SAMPLE"; + } + else if (table_expression_modifiers->hasSampleSizeRatio()) + { + table_expression_modifiers_error_message += "SAMPLE"; + } + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Table expression modifiers {} are not supported for subquery {}. In scope {}", + table_expression_modifiers_error_message, + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } + else if (table_node || table_function_node) + { + auto table_expression_modifiers = table_node ? table_node->getTableExpressionModifiers() : table_function_node->getTableExpressionModifiers(); + + if (table_expression_modifiers.has_value()) + { + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + if (table_expression_modifiers->hasFinal() && !storage->supportsFinal()) + throw Exception(ErrorCodes::ILLEGAL_FINAL, + "Storage {} doesn't support FINAL", + storage->getName()); + + if (table_expression_modifiers->hasSampleSizeRatio() && !storage->supportsSampling()) + throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, + "Storage {} doesn't support sampling", + storage->getStorageID().getFullNameNotQuoted()); + } + } +} + +void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + if (!scope.context->getSettingsRef().joined_subquery_requires_alias) + return; + + bool table_expression_has_alias = table_expression_node->hasAlias(); + if (table_expression_has_alias) + return; + + auto table_expression_node_type = table_expression_node->getNodeType(); + if (table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION || + table_expression_node_type == QueryTreeNodeType::QUERY || + table_expression_node_type == QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::ALIAS_REQUIRED, + "JOIN {} no alias for subquery or table function {}. In scope {} (set joined_subquery_requires_alias = 0 to disable restriction)", + join_node->formatASTForErrorMessage(), + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); +} + + +/// Resolve identifier functions implementation + +/// Try resolve table identifier from database catalog +QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context) +{ + size_t parts_size = table_identifier.getPartsSize(); + if (parts_size < 1 || parts_size > 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expected table identifier to contain 1 or 2 parts. Actual '{}'", + table_identifier.getFullName()); + + std::string database_name; + std::string table_name; + + if (table_identifier.isCompound()) + { + database_name = table_identifier[0]; + table_name = table_identifier[1]; + } + else + { + table_name = table_identifier[0]; + } + + StorageID storage_id(database_name, table_name); + storage_id = context->resolveStorageID(storage_id); + auto storage = DatabaseCatalog::instance().getTable(storage_id, context); + auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); + + return std::make_shared(std::move(storage), storage_lock, storage_snapshot); +} + +/** Resolve identifier from expression arguments. + * + * Expression arguments can be initialized during lambda analysis or they could be provided externally. + * Expression arguments must be already resolved nodes. This is client responsibility to resolve them. + * + * Example: SELECT arrayMap(x -> x + 1, [1,2,3]); + * For lambda x -> x + 1, `x` is lambda expression argument. + * + * Resolve strategy: + * 1. Try to bind identifier to scope argument name to node map. + * 2. If identifier is binded but expression context and node type are incompatible return nullptr. + * + * It is important to support edge cases, where we lookup for table or function node, but argument has same name. + * Example: WITH (x -> x + 1) AS func, (func -> func(1) + func) AS lambda SELECT lambda(1); + * + * 3. If identifier is compound and identifier lookup is in expression context, pop first part from identifier lookup and wrap node + * using nested parts of identifier using `wrapExpressionNodeInTupleElement` function. + */ +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) +{ + auto it = scope.expression_argument_name_to_node.find(identifier_lookup.identifier.getFullName()); + bool resolve_full_identifier = it != scope.expression_argument_name_to_node.end(); + + if (!resolve_full_identifier) + { + const auto & identifier_bind_part = identifier_lookup.identifier.front(); + + it = scope.expression_argument_name_to_node.find(identifier_bind_part); + if (it == scope.expression_argument_name_to_node.end()) + return {}; + } + + auto node_type = it->second->getNodeType(); + if (identifier_lookup.isExpressionLookup() && !isExpressionNodeType(node_type)) + return {}; + else if (identifier_lookup.isTableExpressionLookup() && !isTableExpressionNodeType(node_type)) + return {}; + else if (identifier_lookup.isFunctionLookup() && !isFunctionExpressionNodeType(node_type)) + return {}; + + if (!resolve_full_identifier && identifier_lookup.identifier.isCompound() && identifier_lookup.isExpressionLookup()) + { + auto nested_path = IdentifierView(identifier_lookup.identifier); + nested_path.popFirst(); + + auto tuple_element_result = wrapExpressionNodeInTupleElement(it->second, nested_path); + resolveFunction(tuple_element_result, scope); + + return tuple_element_result; + } + + return it->second; +} + +bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) +{ + const auto & identifier_bind_part = identifier_lookup.identifier.front(); + + auto get_alias_name_to_node_map = [&]() -> std::unordered_map & + { + if (identifier_lookup.isExpressionLookup()) + return scope.alias_name_to_expression_node; + else if (identifier_lookup.isFunctionLookup()) + return scope.alias_name_to_lambda_node; + + return scope.alias_name_to_table_expression_node; + }; + + auto & alias_name_to_node_map = get_alias_name_to_node_map(); + auto it = alias_name_to_node_map.find(identifier_bind_part); + + if (it == alias_name_to_node_map.end()) + return false; + + return true; +} + +/** Resolve identifier from scope aliases. + * + * Resolve strategy: + * 1. If alias is registered current expressions that are in resolve process and if last expression is not part of first expression subtree + * throw cyclic aliases exception. + * Otherwise prevent cache usage for identifier lookup and return nullptr. + * + * This is special scenario where identifier has name the same as alias name in one of its parent expressions including itself. + * In such case we cannot resolve identifier from aliases because of recursion. It is client responsibility to register and deregister alias + * names during expressions resolve. + * + * We must prevent cache usage for lookup because lookup outside of expression is supposed to return other value. + * Example: SELECT (id + 1) AS id, id + 2. Lookup for id inside (id + 1) as id should return id from table, but lookup (id + 2) should return + * (id + 1) AS id. + * + * Below cases should work: + * Example: + * SELECT id AS id FROM test_table; + * SELECT value.value1 AS value FROM test_table; + * SELECT (id + 1) AS id FROM test_table; + * SELECT (1 + (1 + id)) AS id FROM test_table; + * + * Below cases should throw cyclic aliases exception: + * SELECT (id + b) AS id, id as b FROM test_table; + * SELECT (1 + b + 1 + id) AS id, b as c, id as b FROM test_table; + * + * 2. Depending on IdentifierLookupContext get alias name to node map from IdentifierResolveScope. + * 3. Try to bind identifier to alias name in map. If there are no such binding return nullptr. + * 4. Add node into current expressions to resolve. TODO: Handle lambdas and tables properly. + * + * 5. If node in map is not resolved, resolve it. It is important because for result type of identifier lookup node can depend on it. + * Example: SELECT value.a, cast('(1)', 'Tuple(a UInt64)') AS value; + * + * Special case for IdentifierNode, if node is identifier depending on lookup context we need to erase entry from expression or lambda map. + * Check QueryExpressionsAliasVisitor documentation. + * + * Special case for QueryNode, if lookup context is expression, evaluate it as scalar subquery. + * + * 6. Pop node from current expressions to resolve. + * 7. If identifier is compound and identifier lookup is in expression context, pop first part from identifier lookup and wrap alias node + * using nested parts of identifier using `wrapExpressionNodeInTupleElement` function. + * + * Example: SELECT value AS alias, alias.nested_path. + * Result: SELECT value AS alias, tupleElement(value, 'nested_path') value.nested_path. + * + * 8. If identifier lookup is in expression context, clone result expression. + */ +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings) +{ + const auto & identifier_bind_part = identifier_lookup.identifier.front(); + + auto get_alias_name_to_node_map = [&]() -> std::unordered_map & + { + if (identifier_lookup.isExpressionLookup()) + return scope.alias_name_to_expression_node; + else if (identifier_lookup.isFunctionLookup()) + return scope.alias_name_to_lambda_node; + + return scope.alias_name_to_table_expression_node; + }; + + auto & alias_name_to_node_map = get_alias_name_to_node_map(); + auto it = alias_name_to_node_map.find(identifier_bind_part); + + if (it == alias_name_to_node_map.end()) + return {}; + + if (!it->second) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Node with alias {} is not valid. In scope {}", + identifier_bind_part, + scope.scope_node->formatASTForErrorMessage()); + + if (scope.expressions_in_resolve_process_stack.hasExpressionWithAlias(identifier_bind_part)) + { + const auto * root_expression = scope.expressions_in_resolve_process_stack.getRoot(); + const auto * top_expression = scope.expressions_in_resolve_process_stack.getTop(); + + if (!isNodePartOfTree(top_expression, root_expression)) + throw Exception(ErrorCodes::CYCLIC_ALIASES, + "Cyclic aliases for identifier '{}'. In scope {}", + identifier_lookup.identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage()); + + scope.non_cached_identifier_lookups_during_expression_resolve.insert(identifier_lookup); + return {}; + } + + auto node_type = it->second->getNodeType(); + + /// Resolve expression if necessary + if (node_type == QueryTreeNodeType::IDENTIFIER) + { + scope.expressions_in_resolve_process_stack.pushNode(it->second); + + auto & alias_identifier_node = it->second->as(); + auto identifier = alias_identifier_node.getIdentifier(); + auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); + it->second = lookup_result.resolved_identifier; + + /** During collection of aliases if node is identifier and has alias, we cannot say if it is + * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification. + * + * If we resolved identifier node as expression, we must remove identifier node alias from + * function alias map. + * If we resolved identifier node as function, we must remove identifier node alias from + * expression alias map. + */ + if (identifier_lookup.isExpressionLookup() && it->second) + scope.alias_name_to_lambda_node.erase(identifier_bind_part); + else if (identifier_lookup.isFunctionLookup() && it->second) + scope.alias_name_to_expression_node.erase(identifier_bind_part); + + scope.expressions_in_resolve_process_stack.popNode(); + } + else if (node_type == QueryTreeNodeType::FUNCTION) + { + resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + } + else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) + { + if (identifier_resolve_settings.allow_to_resolve_subquery_during_identifier_resolution) + resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/); + } + + QueryTreeNodePtr result = it->second; + + /** If identifier is compound and it is expression identifier lookup, wrap compound expression into + * tuple elements functions. + * + * Example: SELECT compound_expression AS alias, alias.first.second; + * Result: SELECT compound_expression AS alias, tupleElement(tupleElement(compound_expression, 'first'), 'second'); + */ + if (identifier_lookup.identifier.isCompound() && result) + { + if (identifier_lookup.isExpressionLookup()) + { + auto nested_path = IdentifierView(identifier_lookup.identifier); + nested_path.popFirst(); + + auto tuple_element_result = wrapExpressionNodeInTupleElement(result, nested_path); + resolveFunction(tuple_element_result, scope); + + result = tuple_element_result; + } + else if (identifier_lookup.isFunctionLookup() || identifier_lookup.isTableExpressionLookup()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Compound identifier '{}' cannot be resolved as {}. In scope {}", + identifier_lookup.identifier.getFullName(), + identifier_lookup.isFunctionLookup() ? "function" : "table expression", + scope.scope_node->formatASTForErrorMessage()); + } + } + + return result; +} + +/** Resolve identifier from table columns. + * + * 1. If table column nodes are empty or identifier is not expression lookup return nullptr. + * 2. If identifier full name match table column use column. Save information that we resolve identifier using full name. + * 3. Else if identifier binds to table column, use column. + * 4. Try to resolve column ALIAS expression if it exists. + * 5. If identifier was compound and was not resolved using full name during step 1 pop first part from identifier lookup and wrap column node + * using nested parts of identifier using `wrapExpressionNodeInTupleElement` function. + * This can be the case with compound ALIAS columns. + * Example: + * CREATE TABLE test_table (id UInt64, value Tuple(id UInt64, value String), alias_value ALIAS value.id) ENGINE=TinyLog; + */ +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) +{ + if (scope.column_name_to_column_node.empty() || !identifier_lookup.isExpressionLookup()) + return {}; + + const auto & identifier = identifier_lookup.identifier; + auto it = scope.column_name_to_column_node.find(identifier.getFullName()); + bool full_column_name_match = it != scope.column_name_to_column_node.end(); + + if (!full_column_name_match) + { + it = scope.column_name_to_column_node.find(identifier_lookup.identifier[0]); + if (it == scope.column_name_to_column_node.end()) + return {}; + } + + if (it->second->hasExpression()) + resolveExpressionNode(it->second->getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + QueryTreeNodePtr result = it->second; + + if (!full_column_name_match && identifier.isCompound()) + { + auto nested_path = IdentifierView(identifier_lookup.identifier); + nested_path.popFirst(); + + auto tuple_element_result = wrapExpressionNodeInTupleElement(it->second, nested_path); + resolveFunction(tuple_element_result, scope); + + result = tuple_element_result; + } + + return result; +} + +bool QueryAnalyzer::tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + auto table_expression_node_type = table_expression_node->getNodeType(); + + if (table_expression_node_type != QueryTreeNodeType::TABLE && + table_expression_node_type != QueryTreeNodeType::TABLE_FUNCTION && + table_expression_node_type != QueryTreeNodeType::QUERY && + table_expression_node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Unexpected table expression. Expected table, table function, query or union node. Actual {}. In scope {}", + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + const auto & identifier = identifier_lookup.identifier; + const auto & path_start = identifier.getParts().front(); + + auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); + + const auto & table_name = table_expression_data.table_name; + const auto & database_name = table_expression_data.database_name; + + if (identifier_lookup.isTableExpressionLookup()) + { + size_t parts_size = identifier_lookup.identifier.getPartsSize(); + if (parts_size != 1 && parts_size != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}", + identifier_lookup.identifier.getFullName(), + table_expression_node->formatASTForErrorMessage()); + + if (parts_size == 1 && path_start == table_name) + return true; + else if (parts_size == 2 && path_start == database_name && identifier[1] == table_name) + return true; + else + return false; + } + + if (table_expression_data.hasFullIdentifierName(IdentifierView(identifier)) || table_expression_data.canBindIdentifier(IdentifierView(identifier))) + return true; + + if (identifier.getPartsSize() == 1) + return false; + + if ((!table_name.empty() && path_start == table_name) || (table_expression_node->hasAlias() && path_start == table_expression_node->getAlias())) + return true; + + if (identifier.getPartsSize() == 2) + return false; + + if (!database_name.empty() && path_start == database_name && identifier[1] == table_name) + return true; + + return false; +} + +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + auto table_expression_node_type = table_expression_node->getNodeType(); + + if (table_expression_node_type != QueryTreeNodeType::TABLE && + table_expression_node_type != QueryTreeNodeType::TABLE_FUNCTION && + table_expression_node_type != QueryTreeNodeType::QUERY && + table_expression_node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Unexpected table expression. Expected table, table function, query or union node. Actual {}. In scope {}", + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + const auto & identifier = identifier_lookup.identifier; + const auto & path_start = identifier.getParts().front(); + + auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); + + if (identifier_lookup.isTableExpressionLookup()) + { + size_t parts_size = identifier_lookup.identifier.getPartsSize(); + if (parts_size != 1 && parts_size != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}", + identifier_lookup.identifier.getFullName(), + table_expression_node->formatASTForErrorMessage()); + + const auto & table_name = table_expression_data.table_name; + const auto & database_name = table_expression_data.database_name; + + if (parts_size == 1 && path_start == table_name) + return table_expression_node; + else if (parts_size == 2 && path_start == database_name && identifier[1] == table_name) + return table_expression_node; + else + return {}; + } + + auto resolve_identifier_from_storage_or_throw = [&](size_t identifier_column_qualifier_parts) -> QueryTreeNodePtr + { + auto identifier_view = IdentifierView(identifier); + identifier_view.popFirst(identifier_column_qualifier_parts); + + /** Compound identifier cannot be resolved directly from storage if storage is not table. + * + * Example: SELECT test_table.id.value1.value2 FROM test_table; + * In table storage column test_table.id.value1.value2 will exists. + * + * Example: SELECT test_subquery.compound_expression.value FROM (SELECT compound_expression AS value) AS test_subquery; + * Here there is no column with name test_subquery.compound_expression.value, and additional wrap in tuple element is required. + */ + + ColumnNodePtr result_column; + bool compound_identifier = identifier_view.getPartsSize() > 1; + bool match_full_identifier = false; + + auto it = table_expression_data.column_name_to_column_node.find(std::string(identifier_view.getFullName())); + if (it != table_expression_data.column_name_to_column_node.end()) + { + match_full_identifier = true; + result_column = it->second; + } + else + { + it = table_expression_data.column_name_to_column_node.find(std::string(identifier_view.at(0))); + + if (it != table_expression_data.column_name_to_column_node.end()) + result_column = it->second; + } + + if (!result_column || (!match_full_identifier && !compound_identifier)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Identifier '{}' cannot be resolved from {}{}. In scope {}", + identifier.getFullName(), + table_expression_data.table_expression_description, + table_expression_data.table_expression_name.empty() ? "" : " with name " + table_expression_data.table_expression_name, + scope.scope_node->formatASTForErrorMessage()); + + QueryTreeNodePtr result_expression = result_column; + bool clone_is_needed = true; + + if (!match_full_identifier && compound_identifier) + { + IdentifierView nested_path(identifier_view); + nested_path.popFirst(); + auto tuple_element_result = wrapExpressionNodeInTupleElement(result_expression, identifier_view); + resolveFunction(tuple_element_result, scope); + result_expression = std::move(tuple_element_result); + clone_is_needed = false; + } + + if (clone_is_needed) + result_expression = result_expression->clone(); + + auto qualified_identifier = identifier; + for (size_t i = 0; i < identifier_column_qualifier_parts; ++i) + { + auto qualified_identifier_with_removed_part = qualified_identifier; + qualified_identifier_with_removed_part.popFirst(); + + if (qualified_identifier_with_removed_part.empty()) + break; + + if (scope.context->getSettingsRef().prefer_column_name_to_alias + && scope.alias_name_to_expression_node.contains(qualified_identifier_with_removed_part[0])) + break; + + bool can_remove_qualificator = true; + + for (auto & table_expression_to_check_data : scope.table_expression_node_to_data) + { + const auto & table_expression_to_check = table_expression_to_check_data.first; + if (table_expression_to_check.get() == table_expression_node.get()) + continue; + + IdentifierLookup column_identifier_lookup{qualified_identifier_with_removed_part, IdentifierLookupContext::EXPRESSION}; + bool can_bind_identifier_to_table_expression = tryBindIdentifierToTableExpression(column_identifier_lookup, table_expression_to_check, scope); + + if (can_bind_identifier_to_table_expression) + { + can_remove_qualificator = false; + break; + } + } + + if (!can_remove_qualificator) + break; + + qualified_identifier = std::move(qualified_identifier_with_removed_part); + } + + auto qualified_identifier_full_name = qualified_identifier.getFullName(); + node_to_projection_name.emplace(result_expression, std::move(qualified_identifier_full_name)); + + return result_expression; + }; + + /** If identifier first part binds to some column start or table has full identifier name. Then we can try to find whole identifier in table. + * 1. Try to bind identifier first part to column in table, if true get full identifier from table or throw exception. + * 2. Try to bind identifier first part to table name or storage alias, if true remove first part and try to get full identifier from table or throw exception. + * Storage alias works for subquery, table function as well. + * 3. Try to bind identifier first parts to database name and table name, if true remove first two parts and try to get full identifier from table or throw exception. + */ + if (table_expression_data.hasFullIdentifierName(IdentifierView(identifier))) + return resolve_identifier_from_storage_or_throw(0 /*identifier_column_qualifier_parts*/); + + if (table_expression_data.canBindIdentifier(IdentifierView(identifier))) + return resolve_identifier_from_storage_or_throw(0 /*identifier_column_qualifier_parts*/); + + if (identifier.getPartsSize() == 1) + return {}; + + const auto & table_name = table_expression_data.table_name; + if ((!table_name.empty() && path_start == table_name) || (table_expression_node->hasAlias() && path_start == table_expression_node->getAlias())) + return resolve_identifier_from_storage_or_throw(1 /*identifier_column_qualifier_parts*/); + + if (identifier.getPartsSize() == 2) + return {}; + + const auto & database_name = table_expression_data.database_name; + if (!database_name.empty() && path_start == database_name && identifier[1] == table_name) + return resolve_identifier_from_storage_or_throw(2 /*identifier_column_qualifier_parts*/); + + return {}; +} + +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + const auto & from_join_node = table_expression_node->as(); + auto left_resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_join_node.getLeftTableExpression(), scope); + auto right_resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_join_node.getRightTableExpression(), scope); + + if (!identifier_lookup.isExpressionLookup()) + { + if (left_resolved_identifier && right_resolved_identifier) + throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER, + "JOIN {} ambiguous identifier {}. In scope {}", + table_expression_node->formatASTForErrorMessage(), + identifier_lookup.dump(), + scope.scope_node->formatASTForErrorMessage()); + + return left_resolved_identifier ? left_resolved_identifier : right_resolved_identifier; + } + + bool join_node_in_resolve_process = scope.table_expressions_in_resolve_process.contains(table_expression_node.get()); + + std::unordered_map join_using_column_name_to_column_node; + + if (!join_node_in_resolve_process && from_join_node.isUsingJoinExpression()) + { + auto & join_using_list = from_join_node.getJoinExpression()->as(); + + for (auto & join_using_node : join_using_list.getNodes()) + { + auto & column_node = join_using_node->as(); + join_using_column_name_to_column_node.emplace(column_node.getName(), std::static_pointer_cast(join_using_node)); + } + } + + std::optional resolved_side; + QueryTreeNodePtr resolved_identifier; + + JoinKind join_kind = from_join_node.getKind(); + + if (left_resolved_identifier && right_resolved_identifier) + { + auto & left_resolved_column = left_resolved_identifier->as(); + auto & right_resolved_column = right_resolved_identifier->as(); + + auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName()); + if (using_column_node_it != join_using_column_name_to_column_node.end() + && left_resolved_column.getColumnName() == right_resolved_column.getColumnName()) + { + JoinTableSide using_column_inner_column_table_side = isRight(join_kind) ? JoinTableSide::Right : JoinTableSide::Left; + auto & using_column_node = using_column_node_it->second->as(); + auto & using_expression_list = using_column_node.getExpression()->as(); + + size_t inner_column_node_index = using_column_inner_column_table_side == JoinTableSide::Left ? 0 : 1; + const auto & inner_column_node = using_expression_list.getNodes().at(inner_column_node_index); + + auto result_column_node = inner_column_node->clone(); + auto & result_column = result_column_node->as(); + result_column.setColumnType(using_column_node.getColumnType()); + + resolved_identifier = std::move(result_column_node); + } + else + { + throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER, + "JOIN {} ambiguous identifier '{}'. In scope {}", + table_expression_node->formatASTForErrorMessage(), + identifier_lookup.identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage()); + } + } + else if (left_resolved_identifier) + { + resolved_side = JoinTableSide::Left; + auto & left_resolved_column = left_resolved_identifier->as(); + + resolved_identifier = left_resolved_identifier; + + auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName()); + if (using_column_node_it != join_using_column_name_to_column_node.end() && + !using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType())) + { + auto left_resolved_column_clone = std::static_pointer_cast(left_resolved_column.clone()); + left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); + resolved_identifier = std::move(left_resolved_column_clone); + } + else + { + resolved_identifier = left_resolved_identifier; + } + } + else if (right_resolved_identifier) + { + resolved_side = JoinTableSide::Right; + auto & right_resolved_column = right_resolved_identifier->as(); + + auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName()); + if (using_column_node_it != join_using_column_name_to_column_node.end() && + !using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType())) + { + auto right_resolved_column_clone = std::static_pointer_cast(right_resolved_column.clone()); + right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType()); + resolved_identifier = std::move(right_resolved_column_clone); + } + else + { + resolved_identifier = right_resolved_identifier; + } + } + + if (join_node_in_resolve_process || !resolved_identifier) + return resolved_identifier; + + bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls; + + if (join_use_nulls + && (isFull(join_kind) || + (isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) || + (isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left))) + { + resolved_identifier = resolved_identifier->clone(); + auto & resolved_column = resolved_identifier->as(); + resolved_column.setColumnType(makeNullable(resolved_column.getColumnType())); + } + + return resolved_identifier; +} + +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + const auto & from_array_join_node = table_expression_node->as(); + auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope); + + /** Special case when qualified or unqualified identifier point to array join expression without alias. + * + * CREATE TABLE test_table (id UInt64, value String, value_array Array(UInt8)) ENGINE=TinyLog; + * SELECT id, value, value_array, test_table.value_array, default.test_table.value_array FROM test_table ARRAY JOIN value_array; + * + * value_array, test_table.value_array, default.test_table.value_array must be resolved into array join expression. + */ + if (!scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) && resolved_identifier) + { + for (const auto & array_join_expression : from_array_join_node.getJoinExpressions().getNodes()) + { + auto & array_join_column_expression = array_join_expression->as(); + if (array_join_column_expression.hasAlias()) + continue; + + auto & array_join_column_inner_expression = array_join_column_expression.getExpressionOrThrow(); + if (array_join_column_inner_expression.get() == resolved_identifier.get() || + array_join_column_inner_expression->isEqual(*resolved_identifier)) + { + auto array_join_column = array_join_column_expression.getColumn(); + auto result = std::make_shared(array_join_column, table_expression_node); + + return result; + } + } + } + + return resolved_identifier; +} + +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup, const QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope) +{ + auto join_tree_node_type = join_tree_node->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::JOIN: + return tryResolveIdentifierFromJoin(identifier_lookup, join_tree_node, scope); + case QueryTreeNodeType::ARRAY_JOIN: + return tryResolveIdentifierFromArrayJoin(identifier_lookup, join_tree_node, scope); + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + /** Edge case scenario when subquery in FROM node try to resolve identifier from parent scopes, when FROM is not resolved. + * SELECT subquery.b AS value FROM (SELECT value, 1 AS b) AS subquery; + * TODO: This can be supported + */ + if (scope.table_expressions_in_resolve_process.contains(join_tree_node.get())) + return {}; + + return tryResolveIdentifierFromTableExpression(identifier_lookup, join_tree_node, scope); + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Scope FROM section expected table, table function, query, union, join or array join. Actual {}. In scope {}", + join_tree_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } +} + +/** Resolve identifier from scope join tree. + * + * 1. If identifier is in function lookup context return nullptr. + * 2. Try to resolve identifier from table columns. + * 3. If there is no FROM section return nullptr. + * 4. If identifier is in table lookup context, check if it has 1 or 2 parts, otherwise throw exception. + * If identifier has 2 parts try to match it with database_name and table_name. + * If identifier has 1 part try to match it with table_name, then try to match it with table alias. + * 5. If identifier is in expression lookup context, we first need to bind identifier to some table column using identifier first part. + * Start with identifier first part, if it match some column name in table try to get column with full identifier name. + * TODO: Need to check if it is okay to throw exception if compound identifier first part bind to column but column is not valid. + */ +QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) +{ + if (identifier_lookup.isFunctionLookup()) + return {}; + + /// Try to resolve identifier from table columns + if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope)) + return resolved_identifier; + + if (scope.expression_join_tree_node) + return tryResolveIdentifierFromJoinTreeNode(identifier_lookup, scope.expression_join_tree_node, scope); + + auto * query_scope_node = scope.scope_node->as(); + if (!query_scope_node || !query_scope_node->getJoinTree()) + return {}; + + const auto & join_tree_node = query_scope_node->getJoinTree(); + return tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_tree_node, scope); +} + +/** Try resolve identifier in current scope parent scopes. + * + * TODO: If column is matched, throw exception that nested subqueries are not supported. + * + * If initial scope is expression. Then try to resolve identifier in parent scopes until query scope is hit. + * For query scope resolve strategy is same as if initial scope if query. + */ +IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope) +{ + bool initial_scope_is_query = scope.scope_node->getNodeType() == QueryTreeNodeType::QUERY; + bool initial_scope_is_expression = !initial_scope_is_query; + + IdentifierResolveSettings identifier_resolve_settings; + identifier_resolve_settings.allow_to_check_parent_scopes = false; + identifier_resolve_settings.allow_to_check_database_catalog = false; + + IdentifierResolveScope * scope_to_check = scope.parent_scope; + + if (initial_scope_is_expression) + { + while (scope_to_check != nullptr) + { + auto resolve_result = tryResolveIdentifier(identifier_lookup, *scope_to_check, identifier_resolve_settings); + if (resolve_result.resolved_identifier) + return resolve_result; + + bool scope_was_query = scope_to_check->scope_node->getNodeType() == QueryTreeNodeType::QUERY; + scope_to_check = scope_to_check->parent_scope; + + if (scope_was_query) + break; + } + } + + while (scope_to_check != nullptr) + { + auto lookup_result = tryResolveIdentifier(identifier_lookup, *scope_to_check, identifier_resolve_settings); + const auto & resolved_identifier = lookup_result.resolved_identifier; + + scope_to_check = scope_to_check->parent_scope; + + if (resolved_identifier) + { + bool is_cte = resolved_identifier->as() && resolved_identifier->as()->isCTE(); + + /** From parent scopes we can resolve table identifiers only as CTE. + * Example: SELECT (SELECT 1 FROM a) FROM test_table AS a; + * + * During child scope table identifier resolve a, table node test_table with alias a from parent scope + * is invalid. + */ + if (identifier_lookup.isTableExpressionLookup() && !is_cte) + continue; + + if (is_cte) + { + return lookup_result; + } + else if (const auto constant_value = resolved_identifier->getConstantValueOrNull()) + { + lookup_result.resolved_identifier = std::make_shared(constant_value); + return lookup_result; + } + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Resolve identifier '{}' from parent scope only supported for constants and CTE. Actual {} node type {}. In scope {}", + identifier_lookup.identifier.getFullName(), + resolved_identifier->formatASTForErrorMessage(), + resolved_identifier->getNodeTypeName(), + scope.scope_node->formatASTForErrorMessage()); + } + } + + return {}; +} + +/** Resolve identifier in scope. + * + * If identifier was resolved resolve identified lookup status will be updated. + * + * Steps: + * 1. Register identifier lookup in scope identifier lookup to resolve status table. + * If entry is already registered and is not resolved, that means that we have cyclic aliases for identifier. + * Example: SELECT a AS b, b AS a; + * Try resolve identifier in current scope: + * 3. Try resolve identifier from expression arguments. + * + * If prefer_column_name_to_alias = true. + * 4. Try to resolve identifier from join tree. + * 5. Try to resolve identifier from aliases. + * Otherwise. + * 4. Try to resolve identifier from aliases. + * 5. Try to resolve identifier from join tree. + * + * 6. If it is table identifier lookup try to lookup identifier in current scope CTEs. + * + * 7. If identifier is not resolved in current scope, try to resolve it in parent scopes. + * 8. If identifier is not resolved from parent scopes and it is table identifier lookup try to lookup identifier + * in database catalog. + * + * Same is not done for functions because function resolution is more complex, and in case of aggregate functions requires not only name + * but also argument types, it is responsibility of resolve function method to handle resolution of function name. + * + * 9. If identifier was not resolved, or identifier caching was disabled remove it from identifier lookup to resolve status table. + * + * It is okay for identifier to be not resolved, in case we want first try to lookup identifier in one context, + * then if there is no identifier in this context, try to lookup in another context. + * Example: Try to lookup identifier as expression, if it is not found, lookup as function. + * Example: Try to lookup identifier as expression, if it is not found, lookup as table. + */ +IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope, IdentifierResolveSettings identifier_resolve_settings) +{ + auto it = scope.identifier_lookup_to_result.find(identifier_lookup); + if (it != scope.identifier_lookup_to_result.end()) + { + if (!it->second.resolved_identifier) + throw Exception(ErrorCodes::CYCLIC_ALIASES, + "Cyclic aliases for identifier '{}'. In scope {}", + identifier_lookup.identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage()); + + if (scope.use_identifier_lookup_to_result_cache && !scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup)) + return it->second; + } + + auto [insert_it, _] = scope.identifier_lookup_to_result.insert({identifier_lookup, IdentifierResolveResult()}); + it = insert_it; + + /// Resolve identifier from current scope + + IdentifierResolveResult resolve_result; + resolve_result.resolved_identifier = tryResolveIdentifierFromExpressionArguments(identifier_lookup, scope); + if (resolve_result.resolved_identifier) + resolve_result.resolve_place = IdentifierResolvePlace::EXPRESSION_ARGUMENTS; + + if (!resolve_result.resolved_identifier) + { + bool prefer_column_name_to_alias = scope.context->getSettingsRef().prefer_column_name_to_alias; + + if (unlikely(prefer_column_name_to_alias)) + { + if (identifier_resolve_settings.allow_to_check_join_tree) + { + resolve_result.resolved_identifier = tryResolveIdentifierFromJoinTree(identifier_lookup, scope); + + if (resolve_result.resolved_identifier) + resolve_result.resolve_place = IdentifierResolvePlace::JOIN_TREE; + } + + if (!resolve_result.resolved_identifier) + { + resolve_result.resolved_identifier = tryResolveIdentifierFromAliases(identifier_lookup, scope, identifier_resolve_settings); + + if (resolve_result.resolved_identifier) + resolve_result.resolve_place = IdentifierResolvePlace::ALIASES; + } + } + else + { + resolve_result.resolved_identifier = tryResolveIdentifierFromAliases(identifier_lookup, scope, identifier_resolve_settings); + + if (resolve_result.resolved_identifier) + { + resolve_result.resolve_place = IdentifierResolvePlace::ALIASES; + } + else if (identifier_resolve_settings.allow_to_check_join_tree) + { + resolve_result.resolved_identifier = tryResolveIdentifierFromJoinTree(identifier_lookup, scope); + + if (resolve_result.resolved_identifier) + resolve_result.resolve_place = IdentifierResolvePlace::JOIN_TREE; + } + } + } + + if (!resolve_result.resolved_identifier && identifier_lookup.isTableExpressionLookup()) + { + auto cte_query_node_it = scope.cte_name_to_query_node.find(identifier_lookup.identifier.getFullName()); + if (cte_query_node_it != scope.cte_name_to_query_node.end()) + { + resolve_result.resolved_identifier = cte_query_node_it->second; + resolve_result.resolve_place = IdentifierResolvePlace::CTE; + } + } + + /// Try to resolve identifier from parent scopes + + if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_parent_scopes) + { + resolve_result = tryResolveIdentifierInParentScopes(identifier_lookup, scope); + + if (resolve_result.resolved_identifier) + resolve_result.resolved_from_parent_scopes = true; + } + + /// Try to resolve table identifier from database catalog + + if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_database_catalog && identifier_lookup.isTableExpressionLookup()) + { + resolve_result.resolved_identifier = tryResolveTableIdentifierFromDatabaseCatalog(identifier_lookup.identifier, scope.context); + + if (resolve_result.resolved_identifier) + resolve_result.resolve_place = IdentifierResolvePlace::DATABASE_CATALOG; + } + + it->second = resolve_result; + + /** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set, + * or identifier caching was disabled in resolve scope we remove identifier lookup result from identifier lookup to result table. + */ + if (!resolve_result.resolved_identifier || + scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) || + !scope.use_identifier_lookup_to_result_cache) + scope.identifier_lookup_to_result.erase(it); + + return resolve_result; +} + +/// Resolve query tree nodes functions implementation + +/** Qualify matched columns projection names for unqualified matcher or qualified matcher resolved nodes + * + * Example: SELECT * FROM test_table AS t1, test_table AS t2; + */ +void QueryAnalyzer::qualifyMatchedColumnsProjectionNamesIfNeeded(QueryTreeNodesWithNames & matched_nodes_with_column_names, + const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + /// Build additional column qualification parts array + std::vector additional_column_qualification_parts; + + if (table_expression_node->hasAlias()) + additional_column_qualification_parts = {table_expression_node->getAlias()}; + else if (auto * table_node = table_expression_node->as()) + additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; + + size_t additional_column_qualification_parts_size = additional_column_qualification_parts.size(); + + /** For each matched column node iterate over additional column qualifications and apply them if column needs to be qualified. + * To check if column needs to be qualified we check if column name can bind to any other table expression in scope or to scope aliases. + */ + std::vector column_qualified_identifier_parts; + + for (auto & [column_node, column_name] : matched_nodes_with_column_names) + { + column_qualified_identifier_parts = Identifier(column_name).getParts(); + + /// Iterate over additional column qualifications and apply them if needed + for (size_t i = 0; i < additional_column_qualification_parts_size; ++i) + { + bool need_to_qualify = false; + auto identifier_to_check = Identifier(column_qualified_identifier_parts); + IdentifierLookup lookup{identifier_to_check, IdentifierLookupContext::EXPRESSION}; + + for (auto & table_expression_data : scope.table_expression_node_to_data) + { + if (table_expression_data.first.get() == table_expression_node.get()) + continue; + + if (tryBindIdentifierToTableExpression(lookup, table_expression_data.first, scope)) + { + need_to_qualify = true; + break; + } + } + + if (tryBindIdentifierToAliases(lookup, scope)) + need_to_qualify = true; + + if (need_to_qualify) + { + /** Add last qualification part that was not used into column qualified identifier. + * If additional column qualification parts consists from [database_name, table_name]. + * On first iteration if column is needed to be qualified to qualify it with table_name. + * On second iteration if column is needed to be qualified to qualify it with database_name. + */ + size_t part_index_to_use_for_qualification = additional_column_qualification_parts_size - i - 1; + const auto & part_to_use = additional_column_qualification_parts[part_index_to_use_for_qualification]; + column_qualified_identifier_parts.insert(column_qualified_identifier_parts.begin(), part_to_use); + } + else + { + break; + } + } + + node_to_projection_name.emplace(column_node, Identifier(column_qualified_identifier_parts).getFullName()); + } +} + +/** Resolve qualified tree matcher. + * + * First try to match qualified identifier to expression. If qualified identifier matched expression node then + * if expression is compound match it column names using matcher `isMatchingColumn` method, if expression is not compound, throw exception. + * If qualified identifier did not match expression in query tree, try to lookup qualified identifier in table context. + */ +QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope) +{ + auto & matcher_node_typed = matcher_node->as(); + assert(matcher_node_typed.isQualified()); + + QueryTreeNodesWithNames matched_expression_nodes_with_column_names; + + auto expression_identifier_lookup = IdentifierLookup{matcher_node_typed.getQualifiedIdentifier(), IdentifierLookupContext::EXPRESSION}; + auto expression_identifier_resolve_result = tryResolveIdentifier(expression_identifier_lookup, scope); + auto expression_query_tree_node = expression_identifier_resolve_result.resolved_identifier; + + /// Try to resolve unqualified matcher for query expression + + if (expression_query_tree_node) + { + auto result_type = expression_query_tree_node->getResultType(); + + while (const auto * array_type = typeid_cast(result_type.get())) + result_type = array_type->getNestedType(); + + const auto * tuple_data_type = typeid_cast(result_type.get()); + if (!tuple_data_type) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Qualified matcher {} find non compound expression {} with type {}. Expected tuple or array of tuples. In scope {}", + matcher_node->formatASTForErrorMessage(), + expression_query_tree_node->formatASTForErrorMessage(), + expression_query_tree_node->getResultType()->getName(), + scope.scope_node->formatASTForErrorMessage()); + + const auto & element_names = tuple_data_type->getElementNames(); + + auto qualified_matcher_element_identifier = matcher_node_typed.getQualifiedIdentifier(); + for (const auto & element_name : element_names) + { + if (!matcher_node_typed.isMatchingColumn(element_name)) + continue; + + auto tuple_element_function = std::make_shared("tupleElement"); + tuple_element_function->getArguments().getNodes().push_back(expression_query_tree_node); + tuple_element_function->getArguments().getNodes().push_back(std::make_shared(element_name)); + + QueryTreeNodePtr function_query_node = tuple_element_function; + resolveFunction(function_query_node, scope); + + qualified_matcher_element_identifier.push_back(element_name); + node_to_projection_name.emplace(function_query_node, qualified_matcher_element_identifier.getFullName()); + qualified_matcher_element_identifier.pop_back(); + + matched_expression_nodes_with_column_names.emplace_back(std::move(function_query_node), element_name); + } + + return matched_expression_nodes_with_column_names; + } + + /// Try to resolve qualified matcher for table expression + + IdentifierResolveSettings identifier_resolve_settings; + identifier_resolve_settings.allow_to_check_cte = false; + identifier_resolve_settings.allow_to_check_database_catalog = false; + + auto table_identifier_lookup = IdentifierLookup{matcher_node_typed.getQualifiedIdentifier(), IdentifierLookupContext::TABLE_EXPRESSION}; + auto table_identifier_resolve_result = tryResolveIdentifier(table_identifier_lookup, scope, identifier_resolve_settings); + auto table_expression_node = table_identifier_resolve_result.resolved_identifier; + + if (!table_expression_node) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Qualified matcher {} does not find table. In scope {}", + matcher_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + NamesAndTypes initial_matcher_columns; + + auto * table_expression_query_node = table_expression_node->as(); + auto * table_expression_union_node = table_expression_node->as(); + auto * table_expression_table_node = table_expression_node->as(); + auto * table_expression_table_function_node = table_expression_node->as(); + + if (table_expression_query_node || table_expression_union_node) + { + initial_matcher_columns = table_expression_query_node ? table_expression_query_node->getProjectionColumns() + : table_expression_union_node->computeProjectionColumns(); + } + else if (table_expression_table_node || table_expression_table_function_node) + { + const auto & storage_snapshot = table_expression_table_node ? table_expression_table_node->getStorageSnapshot() + : table_expression_table_function_node->getStorageSnapshot(); + auto storage_columns_list = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All)); + initial_matcher_columns = NamesAndTypes(storage_columns_list.begin(), storage_columns_list.end()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid table expression node {}. In scope {}", + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + for (auto & column : initial_matcher_columns) + { + const auto & column_name = column.name; + if (matcher_node_typed.isMatchingColumn(column_name)) + matched_expression_nodes_with_column_names.emplace_back(std::make_shared(column, table_expression_node), column_name); + } + + qualifyMatchedColumnsProjectionNamesIfNeeded(matched_expression_nodes_with_column_names, table_expression_node, scope); + + return matched_expression_nodes_with_column_names; +} + + +/// Resolve non qualified matcher, using scope join tree node. +QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope) +{ + auto & matcher_node_typed = matcher_node->as(); + assert(matcher_node_typed.isUnqualified()); + + /** There can be edge case if matcher is inside lambda expression. + * Try to find parent query expression using parent scopes. + */ + auto * nearest_query_scope = scope.getNearestQueryScope(); + auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as() : nullptr; + + /// If there are no parent query scope or query scope does not have join tree + if (!nearest_query_scope_query_node || !nearest_query_scope_query_node->getJoinTree()) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Unqualified matcher {} cannot be resolved. There are no table sources. In scope {}", + matcher_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + /** For unqualifited matcher resolve we build table expressions stack from JOIN tree and then process it. + * For table, table function, query, union table expressions add matched columns into table expressions columns stack. + * For array join continue processing. + * For join node combine last left and right table expressions columns on stack together. It is important that if JOIN has USING + * we must add USING columns before combining left and right table expressions columns. Columns from left and right table + * expressions that have same names as columns in USING clause must be skipped. + */ + + auto table_expressions_stack = buildTableExpressionsStack(nearest_query_scope_query_node->getJoinTree()); + std::vector table_expressions_column_nodes_with_names_stack; + + for (auto & table_expression : table_expressions_stack) + { + QueryTreeNodesWithNames matched_expression_nodes_with_column_names; + + if (auto * array_join_node = table_expression->as()) + continue; + + bool table_expression_in_resolve_process = scope.table_expressions_in_resolve_process.contains(table_expression.get()); + + auto * join_node = table_expression->as(); + + if (join_node) + { + size_t table_expressions_column_nodes_with_names_stack_size = table_expressions_column_nodes_with_names_stack.size(); + if (table_expressions_column_nodes_with_names_stack_size < 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected at least 2 table expressions on stack before JOIN processing. Actual {}", + table_expressions_column_nodes_with_names_stack_size); + + auto right_table_expression_columns = std::move(table_expressions_column_nodes_with_names_stack.back()); + table_expressions_column_nodes_with_names_stack.pop_back(); + + auto left_table_expression_columns = std::move(table_expressions_column_nodes_with_names_stack.back()); + table_expressions_column_nodes_with_names_stack.pop_back(); + + std::unordered_set column_names_to_skip; + + if (!table_expression_in_resolve_process && join_node->isUsingJoinExpression()) + { + auto & join_using_list = join_node->getJoinExpression()->as(); + + for (auto & join_using_node : join_using_list.getNodes()) + { + auto & column_node = join_using_node->as(); + const auto & column_name = column_node.getColumnName(); + + if (!matcher_node_typed.isMatchingColumn(column_name)) + continue; + + column_names_to_skip.insert(column_name); + + QueryTreeNodePtr column_source = getColumnSourceForJoinNodeWithUsing(table_expression); + auto matched_column_node = std::make_shared(column_node.getColumn(), column_source); + matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), column_name); + } + } + + for (auto && left_table_column : left_table_expression_columns) + { + if (column_names_to_skip.contains(left_table_column.second)) + continue; + + matched_expression_nodes_with_column_names.push_back(std::move(left_table_column)); + } + + for (auto && right_table_column : right_table_expression_columns) + { + if (column_names_to_skip.contains(right_table_column.second)) + continue; + + matched_expression_nodes_with_column_names.push_back(std::move(right_table_column)); + } + + table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_expression_nodes_with_column_names)); + continue; + } + + auto * table_node = table_expression->as(); + auto * table_function_node = table_expression->as(); + auto * query_node = table_expression->as(); + auto * union_node = table_expression->as(); + + if (table_expression_in_resolve_process) + { + table_expressions_column_nodes_with_names_stack.emplace_back(); + continue; + } + + NamesAndTypes table_expression_columns; + + if (query_node || union_node) + { + table_expression_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); + } + else if (table_node || table_function_node) + { + const auto & storage_snapshot + = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + + UInt8 get_column_options_kind = 0; + + if (matcher_node_typed.isAsteriskMatcher()) + { + get_column_options_kind = GetColumnsOptions::Ordinary; + const auto & settings = scope.context->getSettingsRef(); + + if (settings.asterisk_include_alias_columns) + get_column_options_kind |= GetColumnsOptions::Kind::Aliases; + + if (settings.asterisk_include_materialized_columns) + get_column_options_kind |= GetColumnsOptions::Kind::Materialized; + } + else + { + /// TODO: Check if COLUMNS select aliases column by default + get_column_options_kind = GetColumnsOptions::All; + } + + auto get_columns_options = GetColumnsOptions(static_cast(get_column_options_kind)); + auto storage_columns_list = storage_snapshot->getColumns(get_columns_options); + table_expression_columns = NamesAndTypes(storage_columns_list.begin(), storage_columns_list.end()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unqualified matcher {} resolve unexpected table expression. In scope {}", + matcher_node_typed.formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + for (auto & table_expression_column : table_expression_columns) + { + if (!matcher_node_typed.isMatchingColumn(table_expression_column.name)) + continue; + + auto matched_column_node = std::make_shared(table_expression_column, table_expression); + matched_expression_nodes_with_column_names.emplace_back(std::move(matched_column_node), table_expression_column.name); + } + + qualifyMatchedColumnsProjectionNamesIfNeeded(matched_expression_nodes_with_column_names, table_expression, scope); + + for (auto & [matched_node, column_name] : matched_expression_nodes_with_column_names) + { + auto node_projection_name_it = node_to_projection_name.find(matcher_node); + if (node_projection_name_it != node_to_projection_name.end()) + column_name = node_projection_name_it->second; + } + + table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_expression_nodes_with_column_names)); + } + + QueryTreeNodesWithNames result; + + for (auto & table_expression_column_nodes_with_names : table_expressions_column_nodes_with_names_stack) + { + for (auto && table_expression_column_node_with_name : table_expression_column_nodes_with_names) + result.push_back(std::move(table_expression_column_node_with_name)); + } + + return result; +} + + +/** Resolve query tree matcher. Check MatcherNode.h for detailed matcher description. Check ColumnTransformers.h for detailed transformers description. + * + * 1. Populate matched expression nodes resolving qualified or unqualified matcher. + * 2. Apply column transformers to matched expression nodes. For strict column transformers save used column names. + * 3. Validate strict column transformers. + */ +ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope) +{ + auto & matcher_node_typed = matcher_node->as(); + + QueryTreeNodesWithNames matched_expression_nodes_with_names; + + if (matcher_node_typed.isQualified()) + matched_expression_nodes_with_names = resolveQualifiedMatcher(matcher_node, scope); + else + matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope); + + std::unordered_map> strict_transformer_to_used_column_names; + auto add_strict_transformer_column_name = [&](const IColumnTransformerNode * transformer, const std::string & column_name) + { + auto it = strict_transformer_to_used_column_names.find(transformer); + if (it == strict_transformer_to_used_column_names.end()) + { + auto [inserted_it, _] = strict_transformer_to_used_column_names.emplace(transformer, std::unordered_set()); + it = inserted_it; + } + + it->second.insert(column_name); + }; + + ListNodePtr list = std::make_shared(); + ProjectionNames result_projection_names; + ProjectionNames node_projection_names; + + for (auto & [node, column_name] : matched_expression_nodes_with_names) + { + bool apply_transformer_was_used = false; + bool replace_transformer_was_used = false; + bool execute_apply_transformer = false; + bool execute_replace_transformer = false; + + auto projection_name_it = node_to_projection_name.find(node); + if (projection_name_it != node_to_projection_name.end()) + result_projection_names.push_back(projection_name_it->second); + else + result_projection_names.push_back(column_name); + + for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes()) + { + if (auto * apply_transformer = transformer->as()) + { + const auto & expression_node = apply_transformer->getExpressionNode(); + apply_transformer_was_used = true; + + if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::LAMBDA) + { + auto lambda_expression_to_resolve = expression_node->clone(); + IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/); + node_projection_names = resolveLambda(expression_node, lambda_expression_to_resolve, {node}, lambda_scope); + auto & lambda_expression_to_resolve_typed = lambda_expression_to_resolve->as(); + node = lambda_expression_to_resolve_typed.getExpression(); + } + else if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::FUNCTION) + { + auto function_to_resolve_untyped = expression_node->clone(); + auto & function_to_resolve_typed = function_to_resolve_untyped->as(); + function_to_resolve_typed.getArguments().getNodes().push_back(node); + node_projection_names = resolveFunction(function_to_resolve_untyped, scope); + node = function_to_resolve_untyped; + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Unsupported apply matcher expression type. Expected lambda or function apply transformer. Actual {}. In scope {}", + transformer->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + execute_apply_transformer = true; + } + else if (auto * except_transformer = transformer->as()) + { + if (apply_transformer_was_used || replace_transformer_was_used) + break; + + if (except_transformer->isColumnMatching(column_name)) + { + if (except_transformer->isStrict()) + add_strict_transformer_column_name(except_transformer, column_name); + + node = {}; + break; + } + } + else if (auto * replace_transformer = transformer->as()) + { + if (apply_transformer_was_used || replace_transformer_was_used) + break; + + replace_transformer_was_used = true; + + auto replace_expression = replace_transformer->findReplacementExpression(column_name); + if (!replace_expression) + continue; + + if (replace_transformer->isStrict()) + add_strict_transformer_column_name(replace_transformer, column_name); + + node = replace_expression->clone(); + node_projection_names = resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + execute_replace_transformer = true; + } + + if (execute_apply_transformer || execute_replace_transformer) + { + if (auto * node_list = node->as()) + { + auto & node_list_nodes = node_list->getNodes(); + size_t node_list_nodes_size = node_list_nodes.size(); + + if (node_list_nodes_size != 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "{} transformer {} resolved as list node with size {}. Expected 1. In scope {}", + execute_apply_transformer ? "APPLY" : "REPLACE", + transformer->formatASTForErrorMessage(), + node_list_nodes_size, + scope.scope_node->formatASTForErrorMessage()); + + node = node_list_nodes[0]; + } + + if (node_projection_names.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Matcher node expected 1 projection name. Actual {}", node_projection_names.size()); + + result_projection_names.back() = std::move(node_projection_names[0]); + node_to_projection_name.emplace(node, result_projection_names.back()); + node_projection_names.clear(); + } + } + + if (node) + list->getNodes().push_back(node); + else + result_projection_names.pop_back(); + } + + for (auto & [strict_transformer, used_column_names] : strict_transformer_to_used_column_names) + { + auto strict_transformer_type = strict_transformer->getTransformerType(); + const Names * strict_transformer_column_names = nullptr; + + switch (strict_transformer_type) + { + case ColumnTransfomerType::EXCEPT: + { + const auto * except_transformer = static_cast(strict_transformer); + const auto & except_names = except_transformer->getExceptColumnNames(); + + if (except_names.size() != used_column_names.size()) + strict_transformer_column_names = &except_transformer->getExceptColumnNames(); + + break; + } + case ColumnTransfomerType::REPLACE: + { + const auto * replace_transformer = static_cast(strict_transformer); + const auto & replacement_names = replace_transformer->getReplacementsNames(); + + if (replacement_names.size() != used_column_names.size()) + strict_transformer_column_names = &replace_transformer->getReplacementsNames(); + + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected strict EXCEPT or REPLACE column transformer. Actual type {}. In scope {}", + toString(strict_transformer_type), + scope.scope_node->formatASTForErrorMessage()); + } + } + + if (!strict_transformer_column_names) + continue; + + Names non_matched_column_names; + size_t strict_transformer_column_names_size = strict_transformer_column_names->size(); + for (size_t i = 0; i < strict_transformer_column_names_size; ++i) + { + const auto & column_name = (*strict_transformer_column_names)[i]; + if (used_column_names.find(column_name) == used_column_names.end()) + non_matched_column_names.push_back(column_name); + } + + WriteBufferFromOwnString non_matched_column_names_buffer; + size_t non_matched_column_names_size = non_matched_column_names.size(); + for (size_t i = 0; i < non_matched_column_names_size; ++i) + { + const auto & column_name = non_matched_column_names[i]; + + non_matched_column_names_buffer << column_name; + if (i + 1 != non_matched_column_names_size) + non_matched_column_names_buffer << ", "; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Strict {} column transformer {} expects following column(s) {}", + toString(strict_transformer_type), + strict_transformer->formatASTForErrorMessage(), + non_matched_column_names_buffer.str()); + } + + matcher_node = std::move(list); + + return result_projection_names; +} + +/** Resolve window function window node. + * + * Node can be identifier or window node. + * Example: SELECT count(*) OVER w FROM test_table WINDOW w AS (PARTITION BY id); + * Example: SELECT count(*) OVER (PARTITION BY id); + * + * If node has parent window name specified, then parent window definition is searched in nearest query scope WINDOW section. + * If node is identifier, than node is replaced with window definition. + * If node is window, that window node is merged with parent window node. + * + * Window node PARTITION BY and ORDER BY parts are resolved. + * If window node has frame begin OFFSET or frame end OFFSET specified, they are resolved, and window node frame constants are updated. + * Window node frame is validated. + */ +ProjectionName QueryAnalyzer::resolveWindow(QueryTreeNodePtr & node, IdentifierResolveScope & scope) +{ + std::string parent_window_name; + auto * identifier_node = node->as(); + + ProjectionName result_projection_name; + QueryTreeNodePtr parent_window_node; + + if (identifier_node) + parent_window_name = identifier_node->getIdentifier().getFullName(); + else if (auto * window_node = node->as()) + parent_window_name = window_node->getParentWindowName(); + + if (!parent_window_name.empty()) + { + auto * nearest_query_scope = scope.getNearestQueryScope(); + + if (!nearest_query_scope) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' does not exists.", parent_window_name); + + auto & scope_window_name_to_window_node = nearest_query_scope->window_name_to_window_node; + + auto window_node_it = scope_window_name_to_window_node.find(parent_window_name); + if (window_node_it == scope_window_name_to_window_node.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Window '{}' does not exists. In scope {}", + parent_window_name, + nearest_query_scope->scope_node->formatASTForErrorMessage()); + + parent_window_node = window_node_it->second; + + if (identifier_node) + { + node = parent_window_node->clone(); + result_projection_name = parent_window_name; + } + else + { + mergeWindowWithParentWindow(node, parent_window_node, scope); + } + } + + auto & window_node = node->as(); + window_node.setParentWindowName({}); + + ProjectionNames partition_by_projection_names = resolveExpressionNodeList(window_node.getPartitionByNode(), + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + + ProjectionNames order_by_projection_names = resolveSortNodeList(window_node.getOrderByNode(), scope); + + ProjectionNames frame_begin_offset_projection_names; + ProjectionNames frame_end_offset_projection_names; + + if (window_node.hasFrameBeginOffset()) + { + frame_begin_offset_projection_names = resolveExpressionNode(window_node.getFrameBeginOffsetNode(), + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + + const auto window_frame_begin_constant_value = window_node.getFrameBeginOffsetNode()->getConstantValueOrNull(); + if (!window_frame_begin_constant_value || !isNativeNumber(removeNullable(window_frame_begin_constant_value->getType()))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Window frame begin OFFSET expression must be constant with numeric type. Actual {}. In scope {}", + window_node.getFrameBeginOffsetNode()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + window_node.getWindowFrame().begin_offset = window_frame_begin_constant_value->getValue(); + if (frame_begin_offset_projection_names.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Window FRAME begin offset expected 1 projection name. Actual {}", + frame_begin_offset_projection_names.size()); + } + + if (window_node.hasFrameEndOffset()) + { + frame_end_offset_projection_names = resolveExpressionNode(window_node.getFrameEndOffsetNode(), + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + + const auto window_frame_end_constant_value = window_node.getFrameEndOffsetNode()->getConstantValueOrNull(); + if (!window_frame_end_constant_value || !isNativeNumber(removeNullable(window_frame_end_constant_value->getType()))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Window frame begin OFFSET expression must be constant with numeric type. Actual {}. In scope {}", + window_node.getFrameEndOffsetNode()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + window_node.getWindowFrame().end_offset = window_frame_end_constant_value->getValue(); + if (frame_end_offset_projection_names.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Window FRAME begin offset expected 1 projection name. Actual {}", + frame_end_offset_projection_names.size()); + } + + window_node.getWindowFrame().checkValid(); + + if (result_projection_name.empty()) + { + result_projection_name = calculateWindowProjectionName(node, + parent_window_node, + parent_window_name, + partition_by_projection_names, + order_by_projection_names, + frame_begin_offset_projection_names.empty() ? "" : frame_begin_offset_projection_names.front(), + frame_end_offset_projection_names.empty() ? "" : frame_end_offset_projection_names.front()); + } + + return result_projection_name; +} + +/** Resolve lambda function. + * This function modified lambda_node during resolve. It is caller responsibility to clone lambda before resolve + * if it is needed for later use. + * + * Lambda body expression result projection names is used as lambda projection names. + * + * Lambda expression can be resolved into list node. It is caller responsibility to handle it properly. + * + * lambda_node - node that must have LambdaNode type. + * lambda_node_to_resolve - lambda node to resolve that must have LambdaNode type. + * arguments - lambda arguments. + * scope - lambda scope. It is client responsibility to create it. + * + * Resolve steps: + * 1. Validate arguments. + * 2. Register lambda node in lambdas in resolve process. This is necessary to prevent recursive lambda resolving. + * 3. Initialize scope with lambda aliases. + * 4. Validate lambda argument names, and scope expressions. + * 5. Resolve lambda body expression. + * 6. Deregister lambda node from lambdas in resolve process. + */ +ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_node, + const QueryTreeNodePtr & lambda_node_to_resolve, + const QueryTreeNodes & lambda_arguments, + IdentifierResolveScope & scope) +{ + auto & lambda_to_resolve = lambda_node_to_resolve->as(); + auto & lambda_arguments_nodes = lambda_to_resolve.getArguments().getNodes(); + size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); + + /** Register lambda as being resolved, to prevent recursive lambdas resolution. + * Example: WITH (x -> x + lambda_2(x)) AS lambda_1, (x -> x + lambda_1(x)) AS lambda_2 SELECT 1; + */ + auto it = lambdas_in_resolve_process.find(lambda_node.get()); + if (it != lambdas_in_resolve_process.end()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Recursive lambda {}. In scope {}", + lambda_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + lambdas_in_resolve_process.emplace(lambda_node.get()); + + size_t arguments_size = lambda_arguments.size(); + if (lambda_arguments_nodes_size != arguments_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Lambda {} expect {} arguments. Actual {}. In scope {}", + lambda_to_resolve.formatASTForErrorMessage(), + arguments_size, + lambda_arguments_nodes_size, + scope.scope_node->formatASTForErrorMessage()); + + /// Initialize aliases in lambda scope + QueryExpressionsAliasVisitor visitor(scope); + visitor.visit(lambda_to_resolve.getExpression()); + + /** Replace lambda arguments with new arguments. + * Additionally validate that there are no aliases with same name as lambda arguments. + * Arguments are registered in current scope expression_argument_name_to_node map. + */ + QueryTreeNodes lambda_new_arguments_nodes; + lambda_new_arguments_nodes.reserve(lambda_arguments_nodes_size); + + for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) + { + auto & lambda_argument_node = lambda_arguments_nodes[i]; + auto & lambda_argument_node_typed = lambda_argument_node->as(); + const auto & lambda_argument_name = lambda_argument_node_typed.getIdentifier().getFullName(); + + bool has_expression_node = scope.alias_name_to_expression_node.contains(lambda_argument_name); + bool has_alias_node = scope.alias_name_to_lambda_node.contains(lambda_argument_name); + + if (has_expression_node || has_alias_node) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Alias name '{}' inside lambda {} cannot have same name as lambda argument. In scope {}", + lambda_argument_name, + lambda_argument_node_typed.formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + scope.expression_argument_name_to_node.emplace(lambda_argument_name, lambda_arguments[i]); + lambda_new_arguments_nodes.push_back(lambda_arguments[i]); + } + + lambda_to_resolve.getArguments().getNodes() = std::move(lambda_new_arguments_nodes); + + /// Lambda body expression is resolved as standard query expression node. + auto result_projection_names = resolveExpressionNode(lambda_to_resolve.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + lambdas_in_resolve_process.erase(lambda_node.get()); + + return result_projection_names; +} + +/** Resolve function node in scope. + * During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function), + * with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately. + * + * Steps: + * 1. Resolve function parameters. Validate that each function parameter must be constant node. + * 2. Try to lookup function as lambda in current scope. If it is lambda we can skip `in` and `count` special handling. + * 3. If function is count function, that take unqualified ASTERISK matcher, remove it from its arguments. Example: SELECT count(*) FROM test_table; + * 4. If function is `IN` function, then right part of `IN` function is replaced as subquery. + * 5. Resolve function arguments list, lambda expressions are allowed as function arguments. + * For `IN` function table expressions are allowed as function arguments. + * 6. Initialize argument_columns, argument_types, function_lambda_arguments_indexes arrays from function arguments. + * 7. If function name identifier was not resolved as function in current scope, try to lookup lambda from sql user defined functions factory. + * 8. If function was resolve as lambda from step 2 or 7, then resolve lambda using function arguments and replace function node with lambda result. + * After than function node is resolved. + * 9. If function was not resolved during step 6 as lambda, then try to resolve function as window function or executable user defined function + * or ordinary function or aggregate function. + * + * If function is resolved as window function or executable user defined function or aggregate function, function node is resolved + * no additional special handling is required. + * + * 8. If function was resolved as non aggregate function. Then if some of function arguments are lambda expressions, their result types need to be initialized and + * they must be resolved. + * 9. If function is suitable for constant folding, try to perform constant folding for function node. + */ +ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, IdentifierResolveScope & scope) +{ + FunctionNodePtr function_node_ptr = std::static_pointer_cast(node); + auto function_name = function_node_ptr->getFunctionName(); + + /// Resolve function parameters + + auto parameters_projection_names = resolveExpressionNodeList(function_node_ptr->getParametersNode(), + scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + + /// Convert function parameters into constant parameters array + + Array parameters; + + auto & parameters_nodes = function_node_ptr->getParameters().getNodes(); + parameters.reserve(parameters_nodes.size()); + + for (auto & parameter_node : parameters_nodes) + { + auto constant_value = parameter_node->getConstantValueOrNull(); + + if (!constant_value) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Parameter for function {} expected to have constant value. Actual {}. In scope {}", + function_name, + parameter_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + parameters.push_back(constant_value->getValue()); + } + + //// If function node is not window function try to lookup function node name as lambda identifier. + QueryTreeNodePtr lambda_expression_untyped; + if (!function_node_ptr->isWindowFunction()) + { + auto function_lookup_result = tryResolveIdentifier({Identifier{function_name}, IdentifierLookupContext::FUNCTION}, scope); + lambda_expression_untyped = function_lookup_result.resolved_identifier; + } + + bool is_special_function_in = false; + bool is_special_function_dict_get_or_join_get = false; + bool is_special_function_exists = false; + + if (!lambda_expression_untyped) + { + is_special_function_in = isNameOfInFunction(function_name); + is_special_function_dict_get_or_join_get = functionIsJoinGet(function_name) || functionIsDictGet(function_name); + is_special_function_exists = function_name == "exists"; + + /// Handle SELECT count(*) FROM test_table + if (function_name == "count" && function_node_ptr->getArguments().getNodes().size() == 1) + { + auto * matcher_node = function_node_ptr->getArguments().getNodes().front()->as(); + if (matcher_node && matcher_node->isUnqualified()) + function_node_ptr->getArguments().getNodes().clear(); + } + } + + /** Special functions dictGet and its variations and joinGet can be executed when first argument is identifier. + * Example: SELECT dictGet(identifier, 'value', toUInt64(0)); + * + * Try to resolve identifier as expression identifier and if it is resolved use it. + * Example: WITH 'dict_name' AS identifier SELECT dictGet(identifier, 'value', toUInt64(0)); + * + * Otherwise replace identifier with identifier full name constant. + * Validation that dictionary exists or table exists will be performed during function `getReturnType` method call. + */ + if (is_special_function_dict_get_or_join_get && + !function_node_ptr->getArguments().getNodes().empty() && + function_node_ptr->getArguments().getNodes()[0]->getNodeType() == QueryTreeNodeType::IDENTIFIER) + { + auto & first_argument = function_node_ptr->getArguments().getNodes()[0]; + auto & identifier_node = first_argument->as(); + IdentifierLookup identifier_lookup{identifier_node.getIdentifier(), IdentifierLookupContext::EXPRESSION}; + auto resolve_result = tryResolveIdentifier(identifier_lookup, scope); + + if (resolve_result.isResolved()) + first_argument = std::move(resolve_result.resolved_identifier); + else + first_argument = std::make_shared(identifier_node.getIdentifier().getFullName()); + } + + /// Resolve function arguments + + bool allow_table_expressions = is_special_function_in || is_special_function_exists; + auto arguments_projection_names = resolveExpressionNodeList(function_node_ptr->getArgumentsNode(), + scope, + true /*allow_lambda_expression*/, + allow_table_expressions /*allow_table_expression*/); + + if (is_special_function_exists) + { + /// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1). + auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0); + + auto constant_data_type = std::make_shared(); + + auto in_subquery = std::make_shared(); + in_subquery->getProjection().getNodes().push_back(std::make_shared(1UL, constant_data_type)); + in_subquery->getJoinTree() = exists_subquery_argument; + in_subquery->getLimit() = std::make_shared(1UL, constant_data_type); + in_subquery->resolveProjectionColumns({NameAndTypePair("1", constant_data_type)}); + + function_node_ptr = std::make_shared("in"); + function_node_ptr->getArguments().getNodes() = {std::make_shared(1UL, constant_data_type), in_subquery}; + node = function_node_ptr; + function_name = "in"; + + is_special_function_in = true; + } + + auto & function_node = *function_node_ptr; + + /// Replace right IN function argument if it is table or table function with subquery that read ordinary columns + if (is_special_function_in) + { + auto & function_in_arguments_nodes = function_node.getArguments().getNodes(); + if (function_in_arguments_nodes.size() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} expects 2 arguments", function_name); + + auto & in_second_argument = function_in_arguments_nodes[1]; + auto * table_node = in_second_argument->as(); + auto * table_function_node = in_second_argument->as(); + auto * query_node = in_second_argument->as(); + auto * union_node = in_second_argument->as(); + + if (table_node && dynamic_cast(table_node->getStorage().get()) != nullptr) + { + /// If table is already prepared set, we do not replace it with subquery + } + else if (table_node || table_function_node) + { + const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); + + size_t columns_to_select_size = columns_to_select.size(); + + auto column_nodes_to_select = std::make_shared(); + column_nodes_to_select->getNodes().reserve(columns_to_select_size); + + NamesAndTypes projection_columns; + projection_columns.reserve(columns_to_select_size); + + for (auto & column : columns_to_select) + { + column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, in_second_argument)); + projection_columns.emplace_back(column.name, column.type); + } + + auto in_second_argument_query_node = std::make_shared(); + in_second_argument_query_node->setIsSubquery(true); + in_second_argument_query_node->getProjectionNode() = std::move(column_nodes_to_select); + in_second_argument_query_node->getJoinTree() = std::move(in_second_argument); + in_second_argument_query_node->resolveProjectionColumns(std::move(projection_columns)); + + in_second_argument = std::move(in_second_argument_query_node); + } + else if (query_node || union_node) + { + IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + if (query_node) + resolveQuery(in_second_argument, subquery_scope); + else if (union_node) + resolveUnion(in_second_argument, subquery_scope); + } + } + + /// Initialize function argument columns + + ColumnsWithTypeAndName argument_columns; + DataTypes argument_types; + bool all_arguments_constants = true; + std::vector function_lambda_arguments_indexes; + + auto & function_arguments = function_node.getArguments().getNodes(); + size_t function_arguments_size = function_arguments.size(); + + for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) + { + auto & function_argument = function_arguments[function_argument_index]; + + ColumnWithTypeAndName argument_column; + bool argument_is_lambda = false; + + /** If function argument is lambda, save lambda argument index and initialize argument type as DataTypeFunction + * where function argument types are initialized with empty array of lambda arguments size. + */ + if (const auto * lambda_node = function_argument->as()) + { + argument_is_lambda = true; + size_t lambda_arguments_size = lambda_node->getArguments().getNodes().size(); + argument_column.type = std::make_shared(DataTypes(lambda_arguments_size, nullptr), nullptr); + function_lambda_arguments_indexes.push_back(function_argument_index); + } + else if (is_special_function_in && + (function_argument->getNodeType() == QueryTreeNodeType::TABLE || + function_argument->getNodeType() == QueryTreeNodeType::QUERY || + function_argument->getNodeType() == QueryTreeNodeType::UNION)) + { + argument_column.type = std::make_shared(); + } + else + { + argument_column.type = function_argument->getResultType(); + } + + if (!argument_column.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} argument is not resolved. In scope {}", + function_node.getFunctionName(), + scope.scope_node->formatASTForErrorMessage()); + + const auto constant_value = function_argument->getConstantValueOrNull(); + if (!argument_is_lambda && constant_value) + { + argument_column.column = constant_value->getType()->createColumnConst(1, constant_value->getValue()); + argument_column.type = constant_value->getType(); + } + else + { + all_arguments_constants = false; + } + + argument_types.push_back(argument_column.type); + argument_columns.emplace_back(std::move(argument_column)); + } + + /// Calculate function projection name + ProjectionNames result_projection_names = {calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names)}; + + /** Try to resolve function as + * 1. Lambda function in current scope. Example: WITH (x -> x + 1) AS lambda SELECT lambda(1); + * 2. Lambda function from sql user defined functions. + * 3. Special `untuple` function. + * 4. Special `grouping` function. + * 5. Window function. + * 6. Executable user defined function. + * 7. Ordinary function. + * 8. Aggregate function. + * + * TODO: Provide better error hints. + */ + if (!function_node.isWindowFunction()) + { + if (!lambda_expression_untyped) + lambda_expression_untyped = tryGetLambdaFromSQLUserDefinedFunctions(function_node.getFunctionName(), scope.context); + + /** If function is resolved as lambda. + * Clone lambda before resolve. + * Initialize lambda arguments as function arguments. + * Resolve lambda and then replace function node with resolved lambda expression body. + * Example: WITH (x -> x + 1) AS lambda SELECT lambda(value) FROM test_table; + * Result: SELECT value + 1 FROM test_table; + */ + if (lambda_expression_untyped) + { + auto * lambda_expression = lambda_expression_untyped->as(); + if (!lambda_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function identifier {} must be resolved as lambda. Actual {}. In scope {}", + function_node.getFunctionName(), + lambda_expression_untyped->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + auto lambda_expression_clone = lambda_expression_untyped->clone(); + + IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); + ProjectionNames lambda_projection_names = resolveLambda(lambda_expression_untyped, lambda_expression_clone, function_arguments, lambda_scope); + + auto & resolved_lambda = lambda_expression_clone->as(); + node = resolved_lambda.getExpression(); + + if (node->getNodeType() == QueryTreeNodeType::LIST) + result_projection_names = std::move(lambda_projection_names); + + return result_projection_names; + } + + if (function_name == "untuple") + { + /// Special handling of `untuple` function + + if (function_arguments.size() != 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Function 'untuple' must have 1 argument. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + + const auto & untuple_argument = function_arguments[0]; + auto result_type = untuple_argument->getResultType(); + const auto * tuple_data_type = typeid_cast(result_type.get()); + if (!tuple_data_type) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Function untuple argument must be have compound type. Actual type {}. In scope {}", + result_type->getName(), + scope.scope_node->formatASTForErrorMessage()); + + const auto & element_names = tuple_data_type->getElementNames(); + + auto result_list = std::make_shared(); + result_list->getNodes().reserve(element_names.size()); + + for (const auto & element_name : element_names) + { + auto tuple_element_function = std::make_shared("tupleElement"); + tuple_element_function->getArguments().getNodes().push_back(untuple_argument); + tuple_element_function->getArguments().getNodes().push_back(std::make_shared(element_name)); + + QueryTreeNodePtr function_query_node = tuple_element_function; + resolveFunction(function_query_node, scope); + + result_list->getNodes().push_back(std::move(function_query_node)); + } + + auto untuple_argument_projection_name = arguments_projection_names.at(0); + result_projection_names.clear(); + + for (const auto & element_name : element_names) + { + if (node->hasAlias()) + result_projection_names.push_back(node->getAlias() + '.' + element_name); + else + result_projection_names.push_back(fmt::format("tupleElement({}, '{}')", untuple_argument_projection_name, element_name)); + } + + node = std::move(result_list); + return result_projection_names; + } + else if (function_name == "grouping") + { + /// It is responsibility of planner to perform additional handling of grouping function + if (function_arguments_size == 0) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "Function GROUPING expects at least one argument"); + else if (function_arguments_size > 64) + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Function GROUPING can have up to 64 arguments, but {} provided", + function_arguments_size); + + bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility; + auto grouping_function = std::make_shared(force_grouping_standard_compatibility); + auto grouping_function_adaptor = std::make_shared(std::move(grouping_function)); + function_node.resolveAsFunction(std::move(grouping_function_adaptor), std::make_shared()); + return result_projection_names; + } + } + + if (function_node.isWindowFunction()) + { + if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) + throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, + "Aggregate function with name {} does not exists. In scope {}", + function_name, + scope.scope_node->formatASTForErrorMessage()); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + + function_node.resolveAsWindowFunction(aggregate_function, aggregate_function->getReturnType()); + + bool window_node_is_identifier = function_node.getWindowNode()->getNodeType() == QueryTreeNodeType::IDENTIFIER; + ProjectionName window_projection_name = resolveWindow(function_node.getWindowNode(), scope); + + if (window_node_is_identifier) + result_projection_names[0] += " OVER " + window_projection_name; + else + result_projection_names[0] += " OVER (" + window_projection_name + ')'; + + return result_projection_names; + } + + FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); + + if (!function) + function = FunctionFactory::instance().tryGet(function_name, scope.context); + + if (!function) + { + if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "Function with name {} does not exists. In scope {}", + function_name, + scope.scope_node->formatASTForErrorMessage()); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties); + function_node.resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); + return result_projection_names; + } + + /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. + * Then each lambda arguments are initialized with columns, where column source is lambda. + * This information is important for later steps of query processing. + * Example: SELECT arrayMap(x -> x + 1, [1, 2, 3]). + * lambda node x -> x + 1 identifier x is resolved as column where source is lambda node. + */ + bool has_lambda_arguments = !function_lambda_arguments_indexes.empty(); + if (has_lambda_arguments) + { + function->getLambdaArgumentTypes(argument_types); + + ProjectionNames lambda_projection_names; + for (auto & function_lambda_argument_index : function_lambda_arguments_indexes) + { + auto & lambda_argument = function_arguments[function_lambda_argument_index]; + auto lambda_to_resolve = lambda_argument->clone(); + auto & lambda_to_resolve_typed = lambda_to_resolve->as(); + + const auto & lambda_argument_names = lambda_to_resolve_typed.getArgumentNames(); + size_t lambda_arguments_size = lambda_to_resolve_typed.getArguments().getNodes().size(); + + const auto * function_data_type = typeid_cast(argument_types[function_lambda_argument_index].get()); + if (!function_data_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} expected function data type for lambda argument with index {}. Actual {}. In scope {}", + function_name, + function_lambda_argument_index, + argument_types[function_lambda_argument_index]->getName(), + scope.scope_node->formatASTForErrorMessage()); + + const auto & function_data_type_argument_types = function_data_type->getArgumentTypes(); + size_t function_data_type_arguments_size = function_data_type_argument_types.size(); + if (function_data_type_arguments_size != lambda_arguments_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} function data type for lambda argument with index {} arguments size mismatch. Actual {}. Expected {}. In scope {}", + function_name, + function_data_type_arguments_size, + lambda_arguments_size, + argument_types[function_lambda_argument_index]->getName(), + scope.scope_node->formatASTForErrorMessage()); + + QueryTreeNodes lambda_arguments; + lambda_arguments.reserve(lambda_arguments_size); + + for (size_t i = 0; i < lambda_arguments_size; ++i) + { + const auto & argument_type = function_data_type_argument_types[i]; + auto column_name_and_type = NameAndTypePair{lambda_argument_names[i], argument_type}; + lambda_arguments.push_back(std::make_shared(std::move(column_name_and_type), lambda_to_resolve)); + } + + IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/); + lambda_projection_names = resolveLambda(lambda_argument, lambda_to_resolve, lambda_arguments, lambda_scope); + + if (auto * lambda_list_node_result = lambda_to_resolve_typed.getExpression()->as()) + { + size_t lambda_list_node_result_nodes_size = lambda_list_node_result->getNodes().size(); + + if (lambda_list_node_result_nodes_size != 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Lambda as function argument resolved as list node with size {}. Expected 1. In scope {}", + lambda_list_node_result_nodes_size, + lambda_to_resolve->formatASTForErrorMessage()); + + lambda_to_resolve_typed.getExpression() = lambda_list_node_result->getNodes().front(); + } + + if (arguments_projection_names.at(function_lambda_argument_index) == PROJECTION_NAME_PLACEHOLDER) + { + size_t lambda_projection_names_size =lambda_projection_names.size(); + if (lambda_projection_names_size != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Lambda argument inside function expected to have 1 projection name. Actual {}", + lambda_projection_names_size); + + WriteBufferFromOwnString lambda_argument_projection_name_buffer; + lambda_argument_projection_name_buffer << "lambda("; + lambda_argument_projection_name_buffer << "tuple("; + + size_t lambda_argument_names_size = lambda_argument_names.size(); + + for (size_t i = 0; i < lambda_argument_names_size; ++i) + { + const auto & lambda_argument_name = lambda_argument_names[i]; + lambda_argument_projection_name_buffer << lambda_argument_name; + + if (i + 1 != lambda_argument_names_size) + lambda_argument_projection_name_buffer << ", "; + } + + lambda_argument_projection_name_buffer << "), "; + lambda_argument_projection_name_buffer << lambda_projection_names[0]; + lambda_argument_projection_name_buffer << ")"; + + lambda_projection_names.clear(); + + arguments_projection_names[function_lambda_argument_index] = lambda_argument_projection_name_buffer.str(); + } + + argument_types[function_lambda_argument_index] = std::make_shared(function_data_type_argument_types, lambda_to_resolve->getResultType()); + argument_columns[function_lambda_argument_index].type = argument_types[function_lambda_argument_index]; + function_arguments[function_lambda_argument_index] = std::move(lambda_to_resolve); + } + + /// Recalculate function projection name after lambda resolution + result_projection_names = {calculateFunctionProjectionName(node, parameters_projection_names, arguments_projection_names)}; + } + + /** Create SET column for special function IN to allow constant folding + * if left and right arguments are constants. + * + * Example: SELECT * FROM test_table LIMIT 1 IN 1; + */ + if (is_special_function_in && + function_arguments.at(0)->hasConstantValue() && + function_arguments.at(1)->hasConstantValue()) + { + const auto & first_argument_constant_value = function_arguments[0]->getConstantValue(); + const auto & second_argument_constant_value = function_arguments[1]->getConstantValue(); + + const auto & first_argument_constant_type = first_argument_constant_value.getType(); + const auto & second_argument_constant_literal = second_argument_constant_value.getValue(); + const auto & second_argument_constant_type = second_argument_constant_value.getType(); + + auto set = makeSetForConstantValue(first_argument_constant_type, second_argument_constant_literal, second_argument_constant_type, scope.context->getSettingsRef()); + + /// Create constant set column for constant folding + + auto column_set = ColumnSet::create(1, std::move(set)); + argument_columns[1].column = ColumnConst::create(std::move(column_set), 1); + } + + DataTypePtr result_type; + + try + { + auto function_base = function->build(argument_columns); + result_type = function_base->getResultType(); + + /** If function is suitable for constant folding try to convert it to constant. + * Example: SELECT plus(1, 1); + * Result: SELECT 2; + */ + if (function_base->isSuitableForConstantFolding()) + { + auto executable_function = function_base->prepare(argument_columns); + + ColumnPtr column; + + if (all_arguments_constants) + { + size_t num_rows = function_arguments.empty() ? 0 : argument_columns.front().column->size(); + column = executable_function->execute(argument_columns, result_type, num_rows, true); + } + else + { + column = function_base->getConstantResultForNonConstArguments(argument_columns, result_type); + } + + if (column && isColumnConst(*column)) + { + /// Replace function node with result constant node + Field constant_value; + column->get(0, constant_value); + + function_node.performConstantFolding(std::make_shared(std::move(constant_value), result_type)); + } + } + } + catch (Exception & e) + { + e.addMessage("In scope {}", scope.scope_node->formatASTForErrorMessage()); + throw; + } + + function_node.resolveAsFunction(std::move(function), std::move(result_type)); + + return result_projection_names; +} + +/** Resolve expression node. + * Argument node can be replaced with different node, or even with list node in case of matcher resolution. + * Example: SELECT * FROM test_table; + * * - is matcher node, and it can be resolved into ListNode. + * + * Steps: + * 1. If node has alias, replace node with its value in scope alias map. Register alias in expression_aliases_in_resolve_process, to prevent resolving identifier + * which can bind to expression alias name. Check tryResolveIdentifierFromAliases documentation for additional explanation. + * Example: + * SELECT id AS id FROM test_table; + * SELECT value.value1 AS value FROM test_table; + * + * 2. Call specific resolve method depending on node type. + * + * If allow_table_expression = true and node is query node, then it is not evaluated as scalar subquery. + * Although if node is identifier that is resolved into query node that query is evaluated as scalar subquery. + * SELECT id, (SELECT 1) AS c FROM test_table WHERE a IN c; + * SELECT id, FROM test_table WHERE a IN (SELECT 1); + * + * 3. Special case identifier node. + * Try resolve it as expression identifier. + * Then if allow_lambda_expression = true try to resolve it as function. + * Then if allow_table_expression = true try to resolve it as table expression. + * + * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. + */ +ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) +{ + auto resolved_expression_it = resolved_expressions.find(node); + if (resolved_expression_it != resolved_expressions.end()) + { + /** There can be edge case, when subquery for IN function is resolved multiple times in different context. + * SELECT id IN (subquery AS value), value FROM test_table; + * When we start to resolve `value` identifier, subquery is already resolved but constant folding is not performed. + */ + auto node_type = node->getNodeType(); + if (!allow_table_expression && (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)) + { + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context); + } + + return resolved_expression_it->second; + } + + String node_alias = node->getAlias(); + ProjectionNames result_projection_names; + + if (node_alias.empty()) + { + auto projection_name_it = node_to_projection_name.find(node); + if (projection_name_it != node_to_projection_name.end()) + result_projection_names.push_back(projection_name_it->second); + } + else + { + result_projection_names.push_back(node_alias); + } + + /** Do not use alias table if node has alias same as some other node. + * Example: WITH x -> x + 1 AS lambda SELECT 1 AS lambda; + * During 1 AS lambda resolve if we use alias table we replace node with x -> x + 1 AS lambda. + * + * Do not use alias table if allow_table_expression = true and we resolve query node directly. + * Example: SELECT a FROM test_table WHERE id IN (SELECT 1) AS a; + * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use + * alias table because in alias table subquery could be evaluated as scalar. + */ + bool use_alias_table = true; + if (scope.nodes_with_duplicated_aliases.contains(node) || (allow_table_expression && node->getNodeType() == QueryTreeNodeType::QUERY)) + use_alias_table = false; + + if (!node_alias.empty() && use_alias_table) + { + /** Node could be potentially resolved by resolving other nodes. + * SELECT b, a as b FROM test_table; + * + * To resolve b we need to resolve a. + */ + auto it = scope.alias_name_to_expression_node.find(node_alias); + if (it != scope.alias_name_to_expression_node.end()) + node = it->second; + + if (allow_lambda_expression) + { + it = scope.alias_name_to_lambda_node.find(node_alias); + if (it != scope.alias_name_to_lambda_node.end()) + node = it->second; + } + } + + scope.expressions_in_resolve_process_stack.pushNode(node); + + auto node_type = node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::IDENTIFIER: + { + auto & identifier_node = node->as(); + auto unresolved_identifier = identifier_node.getIdentifier(); + auto resolve_identifier_expression_result = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::EXPRESSION}, scope); + node = resolve_identifier_expression_result.resolved_identifier; + + if (node && result_projection_names.empty() && + (resolve_identifier_expression_result.isResolvedFromJoinTree() || resolve_identifier_expression_result.isResolvedFromExpressionArguments())) + { + auto projection_name_it = node_to_projection_name.find(node); + if (projection_name_it != node_to_projection_name.end()) + result_projection_names.push_back(projection_name_it->second); + } + + if (node && !node_alias.empty()) + scope.alias_name_to_lambda_node.erase(node_alias); + + if (!node && allow_lambda_expression) + { + node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier; + + if (node && !node_alias.empty()) + scope.alias_name_to_expression_node.erase(node_alias); + } + + if (!node && allow_table_expression) + { + node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier; + + /// If table identifier is resolved as CTE clone it + bool resolved_as_cte = node && node->as() && node->as()->isCTE(); + + if (resolved_as_cte) + { + node = node->clone(); + node->as().setIsCTE(false); + } + } + + if (!node) + { + std::string message_clarification; + if (allow_lambda_expression) + message_clarification = std::string(" or ") + toStringLowercase(IdentifierLookupContext::FUNCTION); + + if (allow_table_expression) + message_clarification = std::string(" or ") + toStringLowercase(IdentifierLookupContext::TABLE_EXPRESSION); + + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown {}{} identifier '{}' in scope {}", + toStringLowercase(IdentifierLookupContext::EXPRESSION), + message_clarification, + unresolved_identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage()); + } + + if (node->getNodeType() == QueryTreeNodeType::LIST) + { + result_projection_names.clear(); + resolved_expression_it = resolved_expressions.find(node); + if (resolved_expression_it != resolved_expressions.end()) + return resolved_expression_it->second; + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Identifier '{}' resolve into list node and list node projection names are not initialized. In scope {}", + unresolved_identifier.getFullName(), + scope.scope_node->formatASTForErrorMessage()); + } + + if (result_projection_names.empty()) + result_projection_names.push_back(unresolved_identifier.getFullName()); + + break; + } + case QueryTreeNodeType::MATCHER: + { + result_projection_names = resolveMatcher(node, scope); + break; + } + case QueryTreeNodeType::LIST: + { + /** Edge case if list expression has alias. + * Matchers cannot have aliases, but `untuple` function can. + * Example: SELECT a, untuple(CAST(('hello', 1) AS Tuple(name String, count UInt32))) AS a; + * During resolveFunction `untuple` function is replaced by list of 2 constants 'hello', 1. + */ + result_projection_names = resolveExpressionNodeList(node, scope, allow_lambda_expression, allow_lambda_expression); + break; + } + case QueryTreeNodeType::CONSTANT: + { + if (result_projection_names.empty()) + { + const auto & constant_node = node->as(); + result_projection_names.push_back(constant_node.getValueStringRepresentation()); + } + + /// Already resolved + break; + } + case QueryTreeNodeType::COLUMN: + { + auto & column_node = node->as(); + if (column_node.hasExpression()) + resolveExpressionNode(column_node.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (result_projection_names.empty()) + result_projection_names.push_back(column_node.getColumnName()); + + break; + } + case QueryTreeNodeType::FUNCTION: + { + auto function_projection_names = resolveFunction(node, scope); + + if (result_projection_names.empty() || node->getNodeType() == QueryTreeNodeType::LIST) + result_projection_names = std::move(function_projection_names); + + break; + } + case QueryTreeNodeType::LAMBDA: + { + if (!allow_lambda_expression) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Lambda {} is not allowed in expression context. In scope {}", + node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + if (result_projection_names.empty()) + result_projection_names.push_back(PROJECTION_NAME_PLACEHOLDER); + + /// Lambda must be resolved by caller + break; + } + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + { + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + if (node_type == QueryTreeNodeType::QUERY) + resolveQuery(node, subquery_scope); + else + resolveUnion(node, subquery_scope); + + if (!allow_table_expression) + evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context); + + ++subquery_counter; + if (result_projection_names.empty()) + result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter)); + + break; + } + case QueryTreeNodeType::TABLE: + { + if (!allow_table_expression) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table {} is not allowed in expression context. In scope {}", + node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + auto & table_node = node->as(); + result_projection_names.push_back(table_node.getStorageID().getFullNameNotQuoted()); + + break; + } + case QueryTreeNodeType::TRANSFORMER: + [[fallthrough]]; + case QueryTreeNodeType::SORT: + [[fallthrough]]; + case QueryTreeNodeType::INTERPOLATE: + [[fallthrough]]; + case QueryTreeNodeType::WINDOW: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + [[fallthrough]]; + case QueryTreeNodeType::ARRAY_JOIN: + [[fallthrough]]; + case QueryTreeNodeType::JOIN: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "{} {} is not allowed in expression context. In scope {}", + node->getNodeType(), + node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } + + /** Update aliases after expression node was resolved. + * Do not update node in alias table if we resolve it for duplicate alias. + */ + if (!node_alias.empty() && use_alias_table) + { + auto it = scope.alias_name_to_expression_node.find(node_alias); + if (it != scope.alias_name_to_expression_node.end()) + it->second = node; + + if (allow_lambda_expression) + { + it = scope.alias_name_to_lambda_node.find(node_alias); + if (it != scope.alias_name_to_lambda_node.end()) + it->second = node; + } + } + + resolved_expressions.emplace(node, result_projection_names); + + scope.expressions_in_resolve_process_stack.popNode(); + bool expression_was_root = scope.expressions_in_resolve_process_stack.empty(); + if (expression_was_root) + scope.non_cached_identifier_lookups_during_expression_resolve.clear(); + + return result_projection_names; +} + +/** Resolve expression node list. + * If expression is CTE subquery node it is skipped. + * If expression is resolved in list, it is flattened into initial node list. + * + * Such examples must work: + * Example: CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; SELECT plus(*) FROM test_table; + * Example: SELECT *** FROM system.one; + */ +ProjectionNames QueryAnalyzer::resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression) +{ + auto & node_list_typed = node_list->as(); + size_t node_list_size = node_list_typed.getNodes().size(); + + QueryTreeNodes result_nodes; + result_nodes.reserve(node_list_size); + + ProjectionNames result_projection_names; + + for (auto & node : node_list_typed.getNodes()) + { + auto node_to_resolve = node; + auto expression_node_projection_names = resolveExpressionNode(node_to_resolve, scope, allow_lambda_expression, allow_table_expression); + + size_t expected_projection_names_size = 1; + if (auto * expression_list = node_to_resolve->as()) + { + expected_projection_names_size = expression_list->getNodes().size(); + for (auto & expression_list_node : expression_list->getNodes()) + result_nodes.push_back(expression_list_node); + } + else + { + result_nodes.push_back(std::move(node_to_resolve)); + } + + if (expression_node_projection_names.size() != expected_projection_names_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expression nodes list expected {} projection names. Actual {}", + expected_projection_names_size, + expression_node_projection_names.size()); + + result_projection_names.insert(result_projection_names.end(), expression_node_projection_names.begin(), expression_node_projection_names.end()); + expression_node_projection_names.clear(); + } + + node_list_typed.getNodes() = std::move(result_nodes); + + return result_projection_names; +} + +/** Resolve sort columns nodes list. + */ +ProjectionNames QueryAnalyzer::resolveSortNodeList(QueryTreeNodePtr & sort_node_list, IdentifierResolveScope & scope) +{ + ProjectionNames result_projection_names; + ProjectionNames sort_expression_projection_names; + ProjectionNames fill_from_expression_projection_names; + ProjectionNames fill_to_expression_projection_names; + ProjectionNames fill_step_expression_projection_names; + + auto & sort_node_list_typed = sort_node_list->as(); + for (auto & node : sort_node_list_typed.getNodes()) + { + auto & sort_node = node->as(); + sort_expression_projection_names = resolveExpressionNode(sort_node.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (auto * sort_column_list_node = sort_node.getExpression()->as()) + { + size_t sort_column_list_node_size = sort_column_list_node->getNodes().size(); + if (sort_column_list_node_size != 1) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Sort column node expression resolved into list with size {}. Expected 1. In scope {}", + sort_column_list_node_size, + scope.scope_node->formatASTForErrorMessage()); + } + + sort_node.getExpression() = sort_column_list_node->getNodes().front(); + } + + size_t sort_expression_projection_names_size = sort_expression_projection_names.size(); + if (sort_expression_projection_names_size != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sort expression expected 1 projection name. Actual {}", + sort_expression_projection_names_size); + + if (sort_node.hasFillFrom()) + { + fill_from_expression_projection_names = resolveExpressionNode(sort_node.getFillFrom(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + const auto constant_value = sort_node.getFillFrom()->getConstantValueOrNull(); + if (!constant_value || !isColumnedAsNumber(constant_value->getType())) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Sort FILL FROM expression must be constant with numeric type. Actual {}. In scope {}", + sort_node.getFillFrom()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + size_t fill_from_expression_projection_names_size = fill_from_expression_projection_names.size(); + if (fill_from_expression_projection_names_size != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sort node FILL FROM expression expected 1 projection name. Actual {}", + fill_from_expression_projection_names_size); + } + + if (sort_node.hasFillTo()) + { + fill_to_expression_projection_names = resolveExpressionNode(sort_node.getFillTo(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + const auto constant_value = sort_node.getFillTo()->getConstantValueOrNull(); + if (!constant_value || !isColumnedAsNumber(constant_value->getType())) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Sort FILL TO expression must be constant with numeric type. Actual {}. In scope {}", + sort_node.getFillFrom()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + size_t fill_to_expression_projection_names_size = fill_to_expression_projection_names.size(); + if (fill_to_expression_projection_names_size != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sort node FILL TO expression expected 1 projection name. Actual {}", + fill_to_expression_projection_names_size); + } + + if (sort_node.hasFillStep()) + { + fill_step_expression_projection_names = resolveExpressionNode(sort_node.getFillStep(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + const auto constant_value = sort_node.getFillStep()->getConstantValueOrNull(); + if (!constant_value) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Sort FILL STEP expression must be constant with numeric or interval type. Actual {}. In scope {}", + sort_node.getFillStep()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + bool is_number = isColumnedAsNumber(constant_value->getType()); + bool is_interval = WhichDataType(constant_value->getType()).isInterval(); + if (!is_number && !is_interval) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Sort FILL STEP expression must be constant with numeric or interval type. Actual {}. In scope {}", + sort_node.getFillStep()->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + size_t fill_step_expression_projection_names_size = fill_step_expression_projection_names.size(); + if (fill_step_expression_projection_names_size != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sort FILL STEP expression expected 1 projection name. Actual {}", + fill_step_expression_projection_names_size); + } + + auto sort_column_projection_name = calculateSortColumnProjectionName(node, + sort_expression_projection_names[0], + fill_from_expression_projection_names.empty() ? "" : fill_from_expression_projection_names.front(), + fill_to_expression_projection_names.empty() ? "" : fill_to_expression_projection_names.front(), + fill_step_expression_projection_names.empty() ? "" : fill_step_expression_projection_names.front()); + + result_projection_names.push_back(std::move(sort_column_projection_name)); + + sort_expression_projection_names.clear(); + fill_from_expression_projection_names.clear(); + fill_to_expression_projection_names.clear(); + fill_step_expression_projection_names.clear(); + } + + return result_projection_names; +} + +/** Resolve interpolate columns nodes list. + */ +void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpolate_node_list, IdentifierResolveScope & scope) +{ + auto & interpolate_node_list_typed = interpolate_node_list->as(); + + for (auto & interpolate_node : interpolate_node_list_typed.getNodes()) + { + auto & interpolate_node_typed = interpolate_node->as(); + + resolveExpressionNode(interpolate_node_typed.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + resolveExpressionNode(interpolate_node_typed.getInterpolateExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + } +} + +/** Resolve window nodes list. + */ +void QueryAnalyzer::resolveWindowNodeList(QueryTreeNodePtr & window_node_list, IdentifierResolveScope & scope) +{ + auto & window_node_list_typed = window_node_list->as(); + for (auto & node : window_node_list_typed.getNodes()) + resolveWindow(node, scope); +} + +NamesAndTypes QueryAnalyzer::resolveProjectionExpressionNodeList(QueryTreeNodePtr & projection_node_list, IdentifierResolveScope & scope) +{ + ProjectionNames projection_names = resolveExpressionNodeList(projection_node_list, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + auto projection_nodes = projection_node_list->as().getNodes(); + size_t projection_nodes_size = projection_nodes.size(); + + NamesAndTypes projection_columns; + projection_columns.reserve(projection_nodes_size); + + for (size_t i = 0; i < projection_nodes_size; ++i) + { + auto projection_node = projection_nodes[i]; + + if (!isExpressionNodeType(projection_node->getNodeType())) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Projection node must be constant, function, column, query or union"); + + projection_columns.emplace_back(projection_names[i], projection_node->getResultType()); + } + + return projection_columns; +} + +/** Initialize query join tree node. + * + * 1. Resolve identifiers. + * 2. Register table, table function, query, union, join, array join nodes in scope table expressions in resolve process. + */ +void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope) +{ + std::deque join_tree_node_ptrs_to_process_queue; + join_tree_node_ptrs_to_process_queue.push_back(&join_tree_node); + + while (!join_tree_node_ptrs_to_process_queue.empty()) + { + auto * current_join_tree_node_ptr = join_tree_node_ptrs_to_process_queue.front(); + join_tree_node_ptrs_to_process_queue.pop_front(); + + auto & current_join_tree_node = *current_join_tree_node_ptr; + auto current_join_tree_node_type = current_join_tree_node->getNodeType(); + + switch (current_join_tree_node_type) + { + case QueryTreeNodeType::IDENTIFIER: + { + auto & from_table_identifier = current_join_tree_node->as(); + auto table_identifier_lookup = IdentifierLookup{from_table_identifier.getIdentifier(), IdentifierLookupContext::TABLE_EXPRESSION}; + + IdentifierResolveSettings resolve_settings; + /// In join tree initialization ignore join tree as identifier lookup source + resolve_settings.allow_to_check_join_tree = false; + /** Disable resolve of subquery during identifier resolution. + * Example: SELECT * FROM (SELECT 1) AS t1, t1; + * During `t1` identifier resolution we resolve it into subquery SELECT 1, but we want to disable + * subquery resolution at this stage, because JOIN TREE of parent query is not resolved. + */ + resolve_settings.allow_to_resolve_subquery_during_identifier_resolution = false; + + auto table_identifier_resolve_result = tryResolveIdentifier(table_identifier_lookup, scope, resolve_settings); + auto resolved_identifier = table_identifier_resolve_result.resolved_identifier; + + if (!resolved_identifier) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown table expression identifier '{}' in scope {}", + from_table_identifier.getIdentifier().getFullName(), + scope.scope_node->formatASTForErrorMessage()); + + resolved_identifier = resolved_identifier->clone(); + + auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers(); + + if (auto * resolved_identifier_query_node = resolved_identifier->as()) + { + resolved_identifier_query_node->setIsCTE(false); + if (table_expression_modifiers.has_value()) + resolved_identifier_query_node->setTableExpressionModifiers(*table_expression_modifiers); + } + else if (auto * resolved_identifier_union_node = resolved_identifier->as()) + { + resolved_identifier_union_node->setIsCTE(false); + if (table_expression_modifiers.has_value()) + resolved_identifier_union_node->setTableExpressionModifiers(*table_expression_modifiers); + } + else if (auto * resolved_identifier_table_node = resolved_identifier->as()) + { + if (table_expression_modifiers.has_value()) + resolved_identifier_table_node->setTableExpressionModifiers(*table_expression_modifiers); + } + else if (auto * resolved_identifier_table_function_node = resolved_identifier->as()) + { + if (table_expression_modifiers.has_value()) + resolved_identifier_table_function_node->setTableExpressionModifiers(*table_expression_modifiers); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Identifier in JOIN TREE '{}' resolved into unexpected table expression. In scope {}", + from_table_identifier.getIdentifier().getFullName(), + scope.scope_node->formatASTForErrorMessage()); + } + + auto current_join_tree_node_alias = current_join_tree_node->getAlias(); + resolved_identifier->setAlias(current_join_tree_node_alias); + current_join_tree_node = resolved_identifier; + + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::QUERY: + { + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::UNION: + { + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::TABLE_FUNCTION: + { + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::TABLE: + { + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join = current_join_tree_node->as(); + join_tree_node_ptrs_to_process_queue.push_back(&array_join.getTableExpression()); + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join = current_join_tree_node->as(); + join_tree_node_ptrs_to_process_queue.push_back(&join.getLeftTableExpression()); + join_tree_node_ptrs_to_process_queue.push_back(&join.getRightTableExpression()); + scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get()); + break; + } + default: + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Query FROM section expected table, table function, query, UNION, ARRAY JOIN or JOIN. Actual {} {}. In scope {}", + current_join_tree_node->getNodeTypeName(), + current_join_tree_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } + } +} + +/// Initialize table expression columns for table expression node +void QueryAnalyzer::initializeTableExpressionColumns(const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope) +{ + auto * table_node = table_expression_node->as(); + auto * query_node = table_expression_node->as(); + auto * union_node = table_expression_node->as(); + auto * table_function_node = table_expression_node->as(); + + if (!table_node && !table_function_node && !query_node && !union_node) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Unexpected table expression. Expected table, table function, query or union node. Actual {}. In scope {}", + table_expression_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + + auto table_expression_data_it = scope.table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it != scope.table_expression_node_to_data.end()) + return; + + TableExpressionData table_expression_data; + + if (table_node) + { + const auto & table_storage_id = table_node->getStorageID(); + table_expression_data.table_name = table_storage_id.table_name; + table_expression_data.database_name = table_storage_id.database_name; + table_expression_data.table_expression_name = table_storage_id.getFullNameNotQuoted(); + table_expression_data.table_expression_description = "table"; + } + else if (query_node || union_node) + { + table_expression_data.table_name = query_node ? query_node->getCTEName() : union_node->getCTEName(); + table_expression_data.table_expression_description = "subquery"; + + if (table_expression_node->hasAlias()) + table_expression_data.table_expression_name = table_expression_node->getAlias(); + } + else if (table_function_node) + { + table_expression_data.table_expression_description = "table_function"; + if (table_function_node->hasAlias()) + table_expression_data.table_expression_name = table_function_node->getAlias(); + } + + if (table_node || table_function_node) + { + const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withVirtuals()); + const auto & columns_description = storage_snapshot->metadata->getColumns(); + + std::vector> alias_columns_to_resolve; + ColumnNameToColumnNodeMap column_name_to_column_node; + column_name_to_column_node.reserve(column_names_and_types.size()); + + /** For ALIAS columns in table we must additionally analyze ALIAS expressions. + * Example: CREATE TABLE test_table (id UInt64, alias_value_1 ALIAS id + 5); + * + * To do that we collect alias columns and build table column name to column node map. + * For each alias column we build identifier resolve scope, initialize it with table column name to node map + * and resolve alias column. + */ + for (const auto & column_name_and_type : column_names_and_types) + { + const auto & column_default = columns_description.getDefault(column_name_and_type.name); + + if (column_default && column_default->kind == ColumnDefaultKind::Alias) + { + auto column_node = std::make_shared(column_name_and_type, buildQueryTree(column_default->expression, scope.context), table_expression_node); + column_name_to_column_node.emplace(column_name_and_type.name, column_node); + alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node); + } + else + { + auto column_node = std::make_shared(column_name_and_type, table_expression_node); + column_name_to_column_node.emplace(column_name_and_type.name, column_node); + } + } + + for (auto & [alias_column_to_resolve_name, alias_column_to_resolve] : alias_columns_to_resolve) + { + /** Alias column could be potentially resolved during resolve of other ALIAS column. + * Example: CREATE TABLE test_table (id UInt64, alias_value_1 ALIAS id + alias_value_2, alias_value_2 ALIAS id + 5) ENGINE=TinyLog; + * + * During resolve of alias_value_1, alias_value_2 column will be resolved. + */ + alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name]; + + IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/); + alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node); + alias_column_resolve_scope.context = scope.context; + + /// Initialize aliases in alias column scope + QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope); + visitor.visit(alias_column_to_resolve->getExpression()); + + resolveExpressionNode(alias_column_resolve_scope.scope_node, + alias_column_resolve_scope, + false /*allow_lambda_expression*/, + false /*allow_table_expression*/); + + column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node); + column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve; + } + + table_expression_data.column_name_to_column_node = std::move(column_name_to_column_node); + } + else if (query_node || union_node) + { + auto column_names_and_types = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); + table_expression_data.column_name_to_column_node.reserve(column_names_and_types.size()); + + for (const auto & column_name_and_type : column_names_and_types) + { + auto column_node = std::make_shared(column_name_and_type, table_expression_node); + table_expression_data.column_name_to_column_node.emplace(column_name_and_type.name, column_node); + } + } + + table_expression_data.column_identifier_first_parts.reserve(table_expression_data.column_name_to_column_node.size()); + + for (auto & [column_name, _] : table_expression_data.column_name_to_column_node) + { + Identifier column_name_identifier(column_name); + table_expression_data.column_identifier_first_parts.insert(column_name_identifier.at(0)); + } + + scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data)); +} + +/** Resolve query join tree. + * + * Query join tree must be initialized before calling this function. + */ +void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor) +{ + auto add_table_expression_alias_into_scope = [&](const QueryTreeNodePtr & table_expression_node) + { + const auto & alias_name = table_expression_node->getAlias(); + if (alias_name.empty()) + return; + + auto [it, inserted] = scope.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node); + if (!inserted) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Duplicate aliases {} for table expressions in FROM section are not allowed. Try to register {}. Already registered {}.", + alias_name, + table_expression_node->formatASTForErrorMessage(), + it->second->formatASTForErrorMessage()); + }; + + auto from_node_type = join_tree_node->getNodeType(); + + switch (from_node_type) + { + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + { + IdentifierResolveScope subquery_scope(join_tree_node, &scope); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + if (from_node_type == QueryTreeNodeType::QUERY) + resolveQuery(join_tree_node, subquery_scope); + else if (from_node_type == QueryTreeNodeType::UNION) + resolveUnion(join_tree_node, subquery_scope); + + break; + } + case QueryTreeNodeType::TABLE_FUNCTION: + { + auto & table_function_node = join_tree_node->as(); + expressions_visitor.visit(table_function_node.getArgumentsNode()); + + const auto & table_function_factory = TableFunctionFactory::instance(); + const auto & table_function_name = table_function_node.getTableFunctionName(); + + auto & scope_context = scope.context; + + TableFunctionPtr table_function_ptr = table_function_factory.tryGet(table_function_name, scope_context); + if (!table_function_ptr) + { + auto hints = TableFunctionFactory::instance().getHints(table_function_name); + if (!hints.empty()) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, + "Unknown table function {}. Maybe you meant: {}", + table_function_name, + DB::toString(hints)); + else + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}", table_function_name); + } + + if (scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + { + const auto & insertion_table = scope_context->getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } + } + + /// TODO: Special functions that can take query + /// TODO: Support qualified matchers for table function + + for (auto & argument_node : table_function_node.getArguments().getNodes()) + { + if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Matcher as table function argument is not supported {}. In scope {}", + join_tree_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + auto * function_node = argument_node->as(); + if (function_node && table_function_factory.hasNameOrAlias(function_node->getFunctionName())) + continue; + + resolveExpressionNode(argument_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); + } + + auto table_function_ast = table_function_node.toAST(); + table_function_ptr->parseArguments(table_function_ast, scope_context); + + auto table_function_storage = table_function_ptr->execute(table_function_ast, scope_context, table_function_ptr->getName()); + table_function_node.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context); + + break; + } + case QueryTreeNodeType::TABLE: + { + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join_node = join_tree_node->as(); + resolveQueryJoinTreeNode(array_join_node.getTableExpression(), scope, expressions_visitor); + validateJoinTableExpressionWithoutAlias(join_tree_node, array_join_node.getTableExpression(), scope); + + /// Wrap array join expressions into column nodes, where array join expression is inner expression. + + for (auto & array_join_expression : array_join_node.getJoinExpressions().getNodes()) + { + auto array_join_expression_alias = array_join_expression->getAlias(); + if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node.contains(array_join_expression_alias)) + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + "ARRAY JOIN expression {} with duplicate alias {}. In scope {}", + array_join_expression->formatASTForErrorMessage(), + array_join_expression_alias, + scope.scope_node->formatASTForErrorMessage()); + + /// Add array join expression into scope + expressions_visitor.visit(array_join_expression); + + resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + auto result_type = array_join_expression->getResultType(); + + if (!isArray(result_type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "ARRAY JOIN {} requires expression with Array type. Actual {}. In scope {}", + array_join_node.formatASTForErrorMessage(), + result_type->getName(), + scope.scope_node->formatASTForErrorMessage()); + + result_type = assert_cast(*result_type).getNestedType(); + + auto array_join_expression_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter); + ++array_join_expressions_counter; + + auto array_join_column = std::make_shared(NameAndTypePair{array_join_expression_name, result_type}, array_join_expression, join_tree_node); + array_join_expression = std::move(array_join_column); + array_join_expression->setAlias(array_join_expression_alias); + + auto it = scope.alias_name_to_expression_node.find(array_join_expression_alias); + if (it != scope.alias_name_to_expression_node.end()) + it->second = std::make_shared(NameAndTypePair{array_join_expression_name, result_type}, join_tree_node); + } + + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node->as(); + + resolveQueryJoinTreeNode(join_node.getLeftTableExpression(), scope, expressions_visitor); + validateJoinTableExpressionWithoutAlias(join_tree_node, join_node.getLeftTableExpression(), scope); + + resolveQueryJoinTreeNode(join_node.getRightTableExpression(), scope, expressions_visitor); + validateJoinTableExpressionWithoutAlias(join_tree_node, join_node.getRightTableExpression(), scope); + + if (join_node.isUsingJoinExpression()) + { + auto & join_using_list = join_node.getJoinExpression()->as(); + std::unordered_set join_using_identifiers; + + for (auto & join_using_node : join_using_list.getNodes()) + { + auto * identifier_node = join_using_node->as(); + if (!identifier_node) + continue; + + const auto & identifier_full_name = identifier_node->getIdentifier().getFullName(); + + if (join_using_identifiers.contains(identifier_full_name)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "JOIN {} identifier '{}' appears more than once in USING clause", + join_node.formatASTForErrorMessage(), + identifier_full_name); + + join_using_identifiers.insert(identifier_full_name); + + IdentifierLookup identifier_lookup {identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION}; + auto result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node.getLeftTableExpression(), scope); + if (!result_left_table_expression) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "JOIN {} using identifier '{}' cannot be resolved from left table expression. In scope {}", + join_node.formatASTForErrorMessage(), + identifier_full_name, + scope.scope_node->formatASTForErrorMessage()); + + auto result_right_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node.getRightTableExpression(), scope); + if (!result_right_table_expression) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "JOIN {} using identifier '{}' cannot be resolved from right table expression. In scope {}", + join_node.formatASTForErrorMessage(), + identifier_full_name, + scope.scope_node->formatASTForErrorMessage()); + + DataTypePtr common_type = tryGetLeastSupertype(DataTypes{result_left_table_expression->getResultType(), result_right_table_expression->getResultType()}); + + if (!common_type) + throw Exception(ErrorCodes::NO_COMMON_TYPE, + "JOIN {} cannot infer common type in USING for identifier '{}'. In scope {}", + join_node.formatASTForErrorMessage(), + identifier_full_name, + scope.scope_node->formatASTForErrorMessage()); + + NameAndTypePair join_using_columns_common_name_and_type(identifier_full_name, common_type); + ListNodePtr join_using_expression = std::make_shared(QueryTreeNodes{result_left_table_expression, result_right_table_expression}); + auto join_using_column = std::make_shared(join_using_columns_common_name_and_type, std::move(join_using_expression), join_tree_node); + + join_using_node = std::move(join_using_column); + } + } + else if (join_node.getJoinExpression()) + { + expressions_visitor.visit(join_node.getJoinExpression()); + auto join_expression = join_node.getJoinExpression(); + resolveExpressionNode(join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + join_node.getJoinExpression() = std::move(join_expression); + } + + break; + } + case QueryTreeNodeType::IDENTIFIER: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Identifiers in FROM section must be already resolved. In scope {}", + join_tree_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + default: + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Query FROM section expected table, table function, query, ARRAY JOIN or JOIN. Actual {}. In scope {}", + join_tree_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } + + auto join_tree_node_type = join_tree_node->getNodeType(); + if (isTableExpressionNodeType(join_tree_node_type)) + { + validateTableExpressionModifiers(join_tree_node, scope); + initializeTableExpressionColumns(join_tree_node, scope); + } + + add_table_expression_alias_into_scope(join_tree_node); + scope.table_expressions_in_resolve_process.erase(join_tree_node.get()); +} + +class ValidateGroupByColumnsVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + ValidateGroupByColumnsVisitor(const QueryTreeNodes & group_by_keys_nodes_, const IdentifierResolveScope & scope_) + : group_by_keys_nodes(group_by_keys_nodes_) + , scope(scope_) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto query_tree_node_type = node->getNodeType(); + if (query_tree_node_type == QueryTreeNodeType::CONSTANT || + query_tree_node_type == QueryTreeNodeType::SORT || + query_tree_node_type == QueryTreeNodeType::INTERPOLATE) + return; + + auto * function_node = node->as(); + if (function_node && function_node->getFunctionName() == "grouping") + { + auto & grouping_function_arguments_nodes = function_node->getArguments().getNodes(); + for (auto & grouping_function_arguments_node : grouping_function_arguments_nodes) + { + bool found_argument_in_group_by_keys = false; + + for (const auto & group_by_key_node : group_by_keys_nodes) + { + if (grouping_function_arguments_node->isEqual(*group_by_key_node)) + { + found_argument_in_group_by_keys = true; + break; + } + } + + if (!found_argument_in_group_by_keys) + throw Exception(ErrorCodes::NOT_AN_AGGREGATE, + "GROUPING function argument {} is not in GROUP BY. In scope {}", + grouping_function_arguments_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + + return; + } + + auto * column_node = node->as(); + if (!column_node) + return; + + auto column_node_source = column_node->getColumnSource(); + if (column_node_source->getNodeType() == QueryTreeNodeType::LAMBDA) + return; + + for (const auto & group_by_key_node : group_by_keys_nodes) + { + if (node->isEqual(*group_by_key_node)) + return; + } + + std::string column_name; + + if (column_node_source->hasAlias()) + column_name = column_node_source->getAlias(); + else if (auto * table_node = column_node_source->as()) + column_name = table_node->getStorageID().getFullTableName(); + + column_name += '.' + column_node->getColumnName(); + + throw Exception(ErrorCodes::NOT_AN_AGGREGATE, + "Column {} is not under aggregate function and not in GROUP BY. In scope {}", + column_name, + scope.scope_node->formatASTForErrorMessage()); + } + + bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + auto * child_function_node = child_node->as(); + if (child_function_node) + { + if (child_function_node->isAggregateFunction()) + return false; + + for (const auto & group_by_key_node : group_by_keys_nodes) + { + if (child_node->isEqual(*group_by_key_node)) + return false; + } + } + + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + const QueryTreeNodes & group_by_keys_nodes; + const IdentifierResolveScope & scope; +}; + +/** Resolve query. + * This function modifies query node during resolve. It is caller responsibility to clone query node before resolve + * if it is needed for later use. + * + * query_node - query_tree_node that must have QueryNode type. + * scope - query scope. It is caller responsibility to create it. + * + * Resolve steps: + * 1. Validate subqueries depth, perform GROUP BY validation that does not depend on information about aggregate functions. + * 2. Initialize query scope with aliases. + * 3. Register CTE subqueries from WITH section in scope and remove them from WITH section. + * 4. Resolve JOIN TREE. + * 5. Resolve projection columns. + * 6. Resolve expressions in other query parts. + * 7. Validate nodes with duplicate aliases. + * 8. Validate aggregate functions, GROUPING function, window functions. + * 9. Remove WITH and WINDOW sections from query. + * 10. Remove aliases from expression and lambda nodes. + * 11. Resolve query tree node with projection columns. + */ +void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope) +{ + size_t max_subquery_depth = scope.context->getSettingsRef().max_subquery_depth; + if (max_subquery_depth && scope.subquery_depth > max_subquery_depth) + throw Exception(ErrorCodes::TOO_DEEP_SUBQUERIES, + "Too deep subqueries. Maximum: {}", + max_subquery_depth); + + auto & query_node_typed = query_node->as(); + + if (query_node_typed.hasSettingsChanges()) + { + auto updated_scope_context = Context::createCopy(scope.context); + updated_scope_context->applySettingsChanges(query_node_typed.getSettingsChanges()); + scope.context = std::move(updated_scope_context); + } + + const auto & settings = scope.context->getSettingsRef(); + + if (settings.group_by_use_nulls) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "GROUP BY use nulls is not supported"); + + bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube(); + + if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.isGroupByWithTotals()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and GROUPING SETS are not supported together"); + + if (query_node_typed.isGroupByWithGroupingSets() && is_rollup_or_cube) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GROUPING SETS are not supported together with ROLLUP and CUBE"); + + if (query_node_typed.isGroupByWithRollup() && (query_node_typed.isGroupByWithGroupingSets() || query_node_typed.isGroupByWithCube())) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ROLLUP is not supported together with GROUPING SETS and CUBE"); + + if (query_node_typed.isGroupByWithCube() && (query_node_typed.isGroupByWithGroupingSets() || query_node_typed.isGroupByWithRollup())) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CUBE is not supported together with GROUPING SETS and ROLLUP"); + + if (query_node_typed.hasHaving() && query_node_typed.isGroupByWithTotals() && is_rollup_or_cube) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING"); + + /// Initialize aliases in query node scope + QueryExpressionsAliasVisitor visitor(scope); + + if (query_node_typed.hasWith()) + visitor.visit(query_node_typed.getWithNode()); + + if (!query_node_typed.getProjection().getNodes().empty()) + visitor.visit(query_node_typed.getProjectionNode()); + + if (query_node_typed.getPrewhere()) + visitor.visit(query_node_typed.getPrewhere()); + + if (query_node_typed.getWhere()) + visitor.visit(query_node_typed.getWhere()); + + if (query_node_typed.hasGroupBy()) + visitor.visit(query_node_typed.getGroupByNode()); + + if (query_node_typed.hasHaving()) + visitor.visit(query_node_typed.getHaving()); + + if (query_node_typed.hasWindow()) + visitor.visit(query_node_typed.getWindowNode()); + + if (query_node_typed.hasOrderBy()) + visitor.visit(query_node_typed.getOrderByNode()); + + if (query_node_typed.hasInterpolate()) + visitor.visit(query_node_typed.getInterpolate()); + + if (query_node_typed.hasLimitByLimit()) + visitor.visit(query_node_typed.getLimitByLimit()); + + if (query_node_typed.hasLimitByOffset()) + visitor.visit(query_node_typed.getLimitByOffset()); + + if (query_node_typed.hasLimitBy()) + visitor.visit(query_node_typed.getLimitByNode()); + + if (query_node_typed.hasLimit()) + visitor.visit(query_node_typed.getLimit()); + + if (query_node_typed.hasOffset()) + visitor.visit(query_node_typed.getOffset()); + + /// Register CTE subqueries and remove them from WITH section + + auto & with_nodes = query_node_typed.getWith().getNodes(); + + for (auto & node : with_nodes) + { + auto * subquery_node = node->as(); + auto * union_node = node->as(); + + bool subquery_is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); + + if (!subquery_is_cte) + continue; + + const auto & cte_name = subquery_node ? subquery_node->getCTEName() : union_node->getCTEName(); + + auto [_, inserted] = scope.cte_name_to_query_node.emplace(cte_name, node); + if (!inserted) + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + "CTE with name {} already exists. In scope {}", + cte_name, + scope.scope_node->formatASTForErrorMessage()); + } + + std::erase_if(with_nodes, [](const QueryTreeNodePtr & node) + { + auto * subquery_node = node->as(); + auto * union_node = node->as(); + + return (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); + }); + + for (auto & window_node : query_node_typed.getWindow().getNodes()) + { + auto & window_node_typed = window_node->as(); + auto parent_window_name = window_node_typed.getParentWindowName(); + if (!parent_window_name.empty()) + { + auto window_node_it = scope.window_name_to_window_node.find(parent_window_name); + if (window_node_it == scope.window_name_to_window_node.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Window '{}' does not exists. In scope {}", + parent_window_name, + scope.scope_node->formatASTForErrorMessage()); + + mergeWindowWithParentWindow(window_node, window_node_it->second, scope); + window_node_typed.setParentWindowName({}); + } + + scope.window_name_to_window_node.emplace(window_node_typed.getAlias(), window_node); + } + + /** Disable identifier cache during JOIN TREE resolve. + * Depending on JOIN expression section, identifier with same name + * can be resolved in different columns. + * + * Example: SELECT id FROM test_table AS t1 INNER JOIN test_table AS t2 ON t1.id = t2.id INNER JOIN test_table AS t3 ON t1.id = t3.id + * In first join expression ON t1.id = t2.id t1.id is resolved into test_table.id column. + * In second join expression ON t1.id = t3.id t1.id must be resolved into test_table.id column after first JOIN. + */ + scope.use_identifier_lookup_to_result_cache = false; + + if (query_node_typed.getJoinTree()) + { + TableExpressionsAliasVisitor table_expressions_visitor(scope); + table_expressions_visitor.visit(query_node_typed.getJoinTree()); + + initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope); + scope.alias_name_to_table_expression_node.clear(); + + resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor); + } + + scope.use_identifier_lookup_to_result_cache = true; + + /// Resolve query node sections. + + auto projection_columns = resolveProjectionExpressionNodeList(query_node_typed.getProjectionNode(), scope); + if (query_node_typed.getProjection().getNodes().empty()) + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, + "Empty list of columns in projection. In scope {}", + scope.scope_node->formatASTForErrorMessage()); + + if (query_node_typed.hasWith()) + resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (query_node_typed.getPrewhere()) + resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (query_node_typed.getWhere()) + resolveExpressionNode(query_node_typed.getWhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (query_node_typed.hasGroupBy()) + { + if (query_node_typed.isGroupByWithGroupingSets()) + { + for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) + { + if (settings.enable_positional_arguments) + replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); + + resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + } + } + else + { + if (settings.enable_positional_arguments) + replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); + + resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + } + } + + if (query_node_typed.hasHaving()) + resolveExpressionNode(query_node_typed.getHaving(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + if (query_node_typed.hasWindow()) + resolveWindowNodeList(query_node_typed.getWindowNode(), scope); + + if (query_node_typed.hasOrderBy()) + { + if (settings.enable_positional_arguments) + replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); + + resolveSortNodeList(query_node_typed.getOrderByNode(), scope); + } + + if (query_node_typed.hasInterpolate()) + resolveInterpolateColumnsNodeList(query_node_typed.getInterpolate(), scope); + + if (query_node_typed.hasLimitByLimit()) + { + resolveExpressionNode(query_node_typed.getLimitByLimit(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + validateLimitOffsetExpression(query_node_typed.getLimitByLimit(), "LIMIT BY LIMIT", scope); + } + + if (query_node_typed.hasLimitByOffset()) + { + resolveExpressionNode(query_node_typed.getLimitByOffset(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + validateLimitOffsetExpression(query_node_typed.getLimitByOffset(), "LIMIT BY OFFSET", scope); + } + + if (query_node_typed.hasLimitBy()) + { + if (settings.enable_positional_arguments) + replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); + + resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + } + + if (query_node_typed.hasLimit()) + { + resolveExpressionNode(query_node_typed.getLimit(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + validateLimitOffsetExpression(query_node_typed.getLimit(), "LIMIT", scope); + } + + if (query_node_typed.hasOffset()) + { + resolveExpressionNode(query_node_typed.getOffset(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + validateLimitOffsetExpression(query_node_typed.getOffset(), "OFFSET", scope); + } + + /** Resolve nodes with duplicate aliases. + * Table expressions cannot have duplicate aliases. + * + * Such nodes during scope aliases collection are placed into duplicated array. + * After scope nodes are resolved, we can compare node with duplicate alias with + * node from scope alias table. + */ + for (const auto & node_with_duplicated_alias : scope.nodes_with_duplicated_aliases) + { + auto node = node_with_duplicated_alias; + auto node_alias = node->getAlias(); + resolveExpressionNode(node, scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/); + + bool has_node_in_alias_table = false; + + auto it = scope.alias_name_to_expression_node.find(node_alias); + if (it != scope.alias_name_to_expression_node.end()) + { + has_node_in_alias_table = true; + + if (!it->second->isEqual(*node)) + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + "Multiple expressions {} and {} for alias {}. In scope {}", + node->formatASTForErrorMessage(), + it->second->formatASTForErrorMessage(), + node_alias, + scope.scope_node->formatASTForErrorMessage()); + } + + it = scope.alias_name_to_lambda_node.find(node_alias); + if (it != scope.alias_name_to_lambda_node.end()) + { + has_node_in_alias_table = true; + + if (!it->second->isEqual(*node)) + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, + "Multiple expressions {} and {} for alias {}. In scope {}", + node->formatASTForErrorMessage(), + it->second->formatASTForErrorMessage(), + node_alias, + scope.scope_node->formatASTForErrorMessage()); + } + + if (!has_node_in_alias_table) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Node {} with duplicate alias {} does not exists in alias table. In scope {}", + node->formatASTForErrorMessage(), + node_alias, + scope.scope_node->formatASTForErrorMessage()); + + node->removeAlias(); + } + + /** Validate aggregates + * + * 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions. + * 2. Check that there are no window functions in JOIN TREE, WHERE, PREWHERE, HAVING, WINDOW, inside another aggregate function, + * inside window function arguments, inside window function window definition. + * 3. Check that there are no columns that are not specified in GROUP BY keys. + * 4. Validate GROUP BY modifiers. + */ + assertNoAggregateFunctionNodes(query_node_typed.getJoinTree(), "in JOIN TREE"); + assertNoGroupingFunction(query_node_typed.getJoinTree(), "in JOIN TREE"); + assertNoWindowFunctionNodes(query_node_typed.getJoinTree(), "in JOIN TREE"); + + if (query_node_typed.hasWhere()) + { + assertNoAggregateFunctionNodes(query_node_typed.getWhere(), "in WHERE"); + assertNoGroupingFunction(query_node_typed.getWhere(), "in WHERE"); + assertNoWindowFunctionNodes(query_node_typed.getWhere(), "in WHERE"); + } + + if (query_node_typed.hasPrewhere()) + { + assertNoAggregateFunctionNodes(query_node_typed.getPrewhere(), "in PREWHERE"); + assertNoGroupingFunction(query_node_typed.getPrewhere(), "in PREWHERE"); + assertNoWindowFunctionNodes(query_node_typed.getPrewhere(), "in PREWHERE"); + } + + if (query_node_typed.hasHaving()) + assertNoWindowFunctionNodes(query_node_typed.getHaving(), "in HAVING"); + + if (query_node_typed.hasWindow()) + assertNoWindowFunctionNodes(query_node_typed.getWindowNode(), "in WINDOW"); + + QueryTreeNodes aggregate_function_nodes; + QueryTreeNodes window_function_nodes; + + collectAggregateFunctionNodes(query_node, aggregate_function_nodes); + collectWindowFunctionNodes(query_node, window_function_nodes); + + if (query_node_typed.hasGroupBy()) + assertNoAggregateFunctionNodes(query_node_typed.getGroupByNode(), "in GROUP BY"); + + for (auto & aggregate_function_node : aggregate_function_nodes) + { + auto & aggregate_function_node_typed = aggregate_function_node->as(); + + assertNoAggregateFunctionNodes(aggregate_function_node_typed.getArgumentsNode(), "inside another aggregate function"); + assertNoGroupingFunction(aggregate_function_node_typed.getArgumentsNode(), "inside another aggregate function"); + assertNoWindowFunctionNodes(aggregate_function_node_typed.getArgumentsNode(), "inside an aggregate function"); + } + + for (auto & window_function_node : window_function_nodes) + { + auto & window_function_node_typed = window_function_node->as(); + assertNoWindowFunctionNodes(window_function_node_typed.getArgumentsNode(), "inside another window function"); + + if (query_node_typed.hasWindow()) + assertNoWindowFunctionNodes(window_function_node_typed.getWindowNode(), "inside window definition"); + } + + QueryTreeNodes group_by_keys_nodes; + group_by_keys_nodes.reserve(query_node_typed.getGroupBy().getNodes().size()); + + for (auto & node : query_node_typed.getGroupBy().getNodes()) + { + if (query_node_typed.isGroupByWithGroupingSets()) + { + auto & grouping_set_keys = node->as(); + for (auto & grouping_set_key : grouping_set_keys.getNodes()) + { + if (grouping_set_key->hasConstantValue()) + continue; + + group_by_keys_nodes.push_back(grouping_set_key); + } + } + else + { + if (node->hasConstantValue()) + continue; + + group_by_keys_nodes.push_back(node); + } + } + + if (query_node_typed.getGroupBy().getNodes().empty()) + { + if (query_node_typed.hasHaving()) + assertNoGroupingFunction(query_node_typed.getHaving(), "in HAVING without GROUP BY"); + + if (query_node_typed.hasOrderBy()) + assertNoGroupingFunction(query_node_typed.getOrderByNode(), "in ORDER BY without GROUP BY"); + + assertNoGroupingFunction(query_node_typed.getProjectionNode(), "in SELECT without GROUP BY"); + } + + bool has_aggregation = !query_node_typed.getGroupBy().getNodes().empty() || !aggregate_function_nodes.empty(); + + if (has_aggregation) + { + ValidateGroupByColumnsVisitor validate_group_by_columns_visitor(group_by_keys_nodes, scope); + + if (query_node_typed.hasHaving()) + validate_group_by_columns_visitor.visit(query_node_typed.getHaving()); + + if (query_node_typed.hasOrderBy()) + validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode()); + + validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode()); + } + + if (!has_aggregation && (query_node_typed.isGroupByWithGroupingSets() || is_rollup_or_cube)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS, ROLLUP, CUBE or GROUPING SETS are not supported without aggregation"); + + /** WITH section can be safely removed, because WITH section only can provide aliases to query expressions + * and CTE for other sections to use. + * + * Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table); + */ + query_node_typed.getWith().getNodes().clear(); + + /** WINDOW section can be safely removed, because WINDOW section can only provide window definition to window functions. + * + * Example: SELECT count(*) OVER w FROM test_table WINDOW w AS (PARTITION BY id); + */ + query_node_typed.getWindow().getNodes().clear(); + + /// Remove aliases from expression and lambda nodes + + for (auto & [_, node] : scope.alias_name_to_expression_node) + node->removeAlias(); + + for (auto & [_, node] : scope.alias_name_to_lambda_node) + node->removeAlias(); + + query_node_typed.resolveProjectionColumns(std::move(projection_columns)); +} + +void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope) +{ + auto & union_node_typed = union_node->as(); + auto & queries_nodes = union_node_typed.getQueries().getNodes(); + + for (auto & query_node : queries_nodes) + { + IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/); + auto query_node_type = query_node->getNodeType(); + + if (query_node_type == QueryTreeNodeType::QUERY) + { + resolveQuery(query_node, subquery_scope); + } + else if (query_node_type == QueryTreeNodeType::UNION) + { + resolveUnion(query_node, subquery_scope); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "UNION unsupported node {}. In scope {}", + query_node->formatASTForErrorMessage(), + scope.scope_node->formatASTForErrorMessage()); + } + } +} + +} + +QueryAnalysisPass::QueryAnalysisPass(QueryTreeNodePtr table_expression_) + : table_expression(std::move(table_expression_)) +{} + +void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + QueryAnalyzer analyzer; + analyzer.resolve(query_tree_node, table_expression, context); +} + +} diff --git a/src/Analyzer/Passes/QueryAnalysisPass.h b/src/Analyzer/Passes/QueryAnalysisPass.h new file mode 100644 index 00000000000..677a13044f2 --- /dev/null +++ b/src/Analyzer/Passes/QueryAnalysisPass.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/** This pass make initial query analysis. + * + * 1. All identifiers are resolved. Next passes can expect that there will be no IdentifierNode in query tree. + * 2. All matchers are resolved. Next passes can expect that there will be no MatcherNode in query tree. + * 3. All functions are resolved. Next passes can expect that for each FunctionNode its result type will be set, and it will be resolved + * as aggregate or non aggregate function. + * 4. All lambda expressions that are function arguments are resolved. Next passes can expect that LambaNode expression is resolved, and lambda has concrete arguments. + * 5. All standalone lambda expressions are resolved. Next passes can expect that there will be no standalone LambaNode expressions in query. + * 6. Constants are folded. Example: SELECT plus(1, 1). + * Motivation for this, there are places in query tree that must contain constant: + * Function parameters. Example: SELECT quantile(0.5)(x). + * Functions in which result type depends on constant expression argument. Example: cast(x, 'type_name'). + * Expressions that are part of LIMIT BY LIMIT, LIMIT BY OFFSET, LIMIT, OFFSET. Example: SELECT * FROM test_table LIMIT expr. + * Window function window frame OFFSET begin and OFFSET end. + * + * 7. All scalar subqueries are evaluated. + * TODO: Scalar subqueries must be evaluated only if they are part of query tree where we must have constant. This is currently not done + * because execution layer does not support scalar subqueries execution. + * + * 8. For query node. + * + * Projection columns are calculated. Later passes cannot change type, display name of projection column, and cannot add or remove + * columns in projection section. + * WITH and WINDOW sections are removed. + * + * 9. Query is validated. Parts that are validated: + * + * Constness of function parameters. + * Constness of LIMIT and OFFSET. + * Window functions frame. Constness of window functions frame begin OFFSET, end OFFSET. + * In query only columns that are specified in GROUP BY keys after GROUP BY are used. + * GROUPING function arguments are specified in GROUP BY keys. + * No GROUPING function if there is no GROUP BY. + * No aggregate functions in JOIN TREE, WHERE, PREWHERE, GROUP BY and inside another aggregate functions. + * GROUP BY modifiers CUBE, ROLLUP, GROUPING SETS and WITH TOTALS. + * Table expression modifiers are validated for table and table function nodes in JOIN TREE. + * Table expression modifiers are disabled for subqueries in JOIN TREE. + * For JOIN, ARRAY JOIN subqueries and table functions must have alias (Can be changed using joined_subquery_requires_alias setting). + * + * 10. Special functions handling: + * Function `untuple` is handled properly. + * Function `arrayJoin` is handled properly. + * For functions `dictGet` and its variations and for function `joinGet` identifier as first argument is handled properly. + * Function `exists` is converted into `in`. + * + * For function `grouping` arguments are resolved, but it is planner responsibility to initialize it with concrete grouping function + * based on group by kind and group by keys positions. + * + * For function `in` and its variations arguments are resolved, but sets are not build. + * If left and right arguments are constants constant folding is performed. + * If right argument resolved as table, and table is not of type Set, it is replaced with query that read only ordinary columns from underlying + * storage. + * Example: SELECT id FROM test_table WHERE id IN test_table_other; + * Result: SELECT id FROM test_table WHERE id IN (SELECT test_table_column FROM test_table_other); + */ +class QueryAnalysisPass final : public IQueryTreePass +{ +public: + /** Construct query analysis pass for query or union analysis. + * Available columns are extracted from query node join tree. + */ + QueryAnalysisPass() = default; + + /** Construct query analysis pass for expression or list of expressions analysis. + * Available expression columns are extracted from table expression. + * Table expression node must have query, union, table, table function type. + */ + explicit QueryAnalysisPass(QueryTreeNodePtr table_expression_); + + String getName() override + { + return "QueryAnalysis"; + } + + String getDescription() override + { + return "Resolve type for each query expression. Replace identifiers, matchers with query expressions. Perform constant folding. Evaluate scalar subqueries."; + } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +private: + QueryTreeNodePtr table_expression; +}; + +} diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp new file mode 100644 index 00000000000..f43c90e10eb --- /dev/null +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -0,0 +1,157 @@ +#include + +#include +#include + +#include +#include + +#include + +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +class SumIfToCountIfVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit SumIfToCountIfVisitor(ContextPtr & context_) + : context(context_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !function_node->isAggregateFunction()) + return; + + auto function_name = function_node->getFunctionName(); + auto lower_function_name = Poco::toLower(function_name); + + /// sumIf, SumIf or sUMIf are valid function names, but sumIF or sumiF are not + if (lower_function_name != "sum" && (lower_function_name != "sumif" || !function_name.ends_with("If"))) + return; + + auto & function_node_arguments_nodes = function_node->getArguments().getNodes(); + + /// Rewrite `sumIf(1, cond)` into `countIf(cond)` + if (lower_function_name == "sumif") + { + if (function_node_arguments_nodes.size() != 2) + return; + + auto constant_value = function_node_arguments_nodes[0]->getConstantValueOrNull(); + if (!constant_value) + return; + + const auto & constant_value_literal = constant_value->getValue(); + if (!isInt64OrUInt64FieldType(constant_value_literal.getType())) + return; + + if (constant_value_literal.get() != 1) + return; + + function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); + function_node_arguments_nodes.resize(1); + + resolveAggregateFunctionNode(*function_node, "countIf"); + return; + } + + /** Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`. + * Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`. + */ + if (function_node_arguments_nodes.size() != 1) + return; + + auto & nested_argument = function_node_arguments_nodes[0]; + auto * nested_function = nested_argument->as(); + if (!nested_function || nested_function->getFunctionName() != "if") + return; + + auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes(); + if (nested_if_function_arguments_nodes.size() != 3) + return; + + auto if_true_condition_constant_value = nested_if_function_arguments_nodes[1]->getConstantValueOrNull(); + auto if_false_condition_constant_value = nested_if_function_arguments_nodes[2]->getConstantValueOrNull(); + + if (!if_true_condition_constant_value || !if_false_condition_constant_value) + return; + + const auto & if_true_condition_constant_value_literal = if_true_condition_constant_value->getValue(); + const auto & if_false_condition_constant_value_literal = if_false_condition_constant_value->getValue(); + + if (!isInt64OrUInt64FieldType(if_true_condition_constant_value_literal.getType()) || + !isInt64OrUInt64FieldType(if_false_condition_constant_value_literal.getType())) + return; + + auto if_true_condition_value = if_true_condition_constant_value_literal.get(); + auto if_false_condition_value = if_false_condition_constant_value_literal.get(); + + /// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`. + if (if_true_condition_value == 1 && if_false_condition_value == 0) + { + function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]); + function_node_arguments_nodes.resize(1); + + resolveAggregateFunctionNode(*function_node, "countIf"); + return; + } + + /// Rewrite `sum(if(cond, 0, 1))` into `countIf(not(cond))`. + if (if_true_condition_value == 0 && if_false_condition_value == 1) + { + auto condition_result_type = nested_if_function_arguments_nodes[0]->getResultType(); + DataTypePtr not_function_result_type = std::make_shared(); + if (condition_result_type->isNullable()) + not_function_result_type = makeNullable(not_function_result_type); + + auto not_function = std::make_shared("not"); + not_function->resolveAsFunction(FunctionFactory::instance().get("not", context), std::move(not_function_result_type)); + + auto & not_function_arguments = not_function->getArguments().getNodes(); + not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); + + function_node_arguments_nodes[0] = std::move(not_function); + function_node_arguments_nodes.resize(1); + + resolveAggregateFunctionNode(*function_node, "countIf"); + return; + } + } + +private: + static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) + { + auto function_result_type = function_node.getResultType(); + auto function_aggregate_function = function_node.getAggregateFunction(); + + AggregateFunctionProperties properties; + auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, + function_aggregate_function->getArgumentTypes(), + function_aggregate_function->getParameters(), + properties); + + function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); + } + + ContextPtr & context; +}; + +} + +void SumIfToCountIfPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + SumIfToCountIfVisitor visitor(context); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.h b/src/Analyzer/Passes/SumIfToCountIfPass.h new file mode 100644 index 00000000000..f3ba47f1c2c --- /dev/null +++ b/src/Analyzer/Passes/SumIfToCountIfPass.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace DB +{ + +/** Rewrite `sum(if(cond, value_1, value_2))` and `sumIf` functions to `countIf`. + * + * Example: SELECT sumIf(1, cond); + * Result: SELECT countIf(cond); + * + * Example: SELECT sum(if(cond, 1, 0)); + * Result: SELECT countIf(cond); + * + * Example: SELECT sum(if(cond, 0, 1)); + * Result: SELECT countIf(not(cond)); + */ +class SumIfToCountIfPass final : public IQueryTreePass +{ +public: + String getName() override { return "SumIfToCountIf"; } + + String getDescription() override { return "Rewrite sum(if) and sumIf into countIf"; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp new file mode 100644 index 00000000000..6520cb0717d --- /dev/null +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -0,0 +1,64 @@ +#include + +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +bool isUniqFunction(const String & function_name) +{ + return function_name == "uniq" || + function_name == "uniqExact" || + function_name == "uniqHLL12" || + function_name == "uniqCombined" || + function_name == "uniqCombined64" || + function_name == "uniqTheta"; +} + +class UniqInjectiveFunctionsEliminationVisitor : public InDepthQueryTreeVisitor +{ +public: + static void visitImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !function_node->isAggregateFunction() || !isUniqFunction(function_node->getFunctionName())) + return; + + auto & uniq_function_arguments_nodes = function_node->getArguments().getNodes(); + for (auto & uniq_function_argument_node : uniq_function_arguments_nodes) + { + auto * uniq_function_argument_node_typed = uniq_function_argument_node->as(); + if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction()) + continue; + + auto & uniq_function_argument_node_argument_nodes = uniq_function_argument_node_typed->getArguments().getNodes(); + + /// Do not apply optimization if injective function contains multiple arguments + if (uniq_function_argument_node_argument_nodes.size() != 1) + continue; + + const auto & uniq_function_argument_node_function = uniq_function_argument_node_typed->getFunction(); + if (!uniq_function_argument_node_function->isInjective({})) + continue; + + /// Replace injective function with its single argument + uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0]; + } + } +}; + +} + +void UniqInjectiveFunctionsEliminationPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +{ + UniqInjectiveFunctionsEliminationVisitor visitor; + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h new file mode 100644 index 00000000000..a0f07dfb7b5 --- /dev/null +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +/** Remove injective functions from `uniq*` functions arguments. + * + * Example: SELECT uniq(injectiveFunction(argument)); + * Result: SELECT uniq(argument); + */ +class UniqInjectiveFunctionsEliminationPass final : public IQueryTreePass +{ +public: + String getName() override { return "UniqInjectiveFunctionsElimination"; } + + String getDescription() override { return "Remove injective functions from uniq functions arguments."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp new file mode 100644 index 00000000000..378bd7259bf --- /dev/null +++ b/src/Analyzer/QueryNode.cpp @@ -0,0 +1,456 @@ +#include + +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +QueryNode::QueryNode() + : IQueryTreeNode(children_size) +{ + children[with_child_index] = std::make_shared(); + children[projection_child_index] = std::make_shared(); + children[group_by_child_index] = std::make_shared(); + children[window_child_index] = std::make_shared(); + children[order_by_child_index] = std::make_shared(); + children[limit_by_child_index] = std::make_shared(); +} + +String QueryNode::getName() const +{ + WriteBufferFromOwnString buffer; + + if (hasWith()) + { + buffer << getWith().getName(); + buffer << ' '; + } + + buffer << "SELECT "; + buffer << getProjection().getName(); + + if (getJoinTree()) + { + buffer << " FROM "; + buffer << getJoinTree()->getName(); + } + + if (getPrewhere()) + { + buffer << " PREWHERE "; + buffer << getPrewhere()->getName(); + } + + if (getWhere()) + { + buffer << " WHERE "; + buffer << getWhere()->getName(); + } + + if (hasGroupBy()) + { + buffer << " GROUP BY "; + buffer << getGroupBy().getName(); + } + + if (hasHaving()) + { + buffer << " HAVING "; + buffer << getHaving()->getName(); + } + + if (hasWindow()) + { + buffer << " WINDOW "; + buffer << getWindow().getName(); + } + + if (hasOrderBy()) + { + buffer << " ORDER BY "; + buffer << getOrderByNode()->getName(); + } + + if (hasInterpolate()) + { + buffer << " INTERPOLATE "; + buffer << getInterpolate()->getName(); + } + + if (hasLimitByLimit()) + { + buffer << "LIMIT "; + buffer << getLimitByLimit()->getName(); + } + + if (hasLimitByOffset()) + { + buffer << "OFFSET "; + buffer << getLimitByOffset()->getName(); + } + + if (hasLimitBy()) + { + buffer << " BY "; + buffer << getLimitBy().getName(); + } + + if (hasLimit()) + { + buffer << " LIMIT "; + buffer << getLimit()->getName(); + } + + if (hasOffset()) + { + buffer << " OFFSET "; + buffer << getOffset()->getName(); + } + + return buffer.str(); +} + +void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "QUERY id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + if (is_subquery) + buffer << ", is_subquery: " << is_subquery; + + if (is_cte) + buffer << ", is_cte: " << is_cte; + + if (is_distinct) + buffer << ", is_distinct: " << is_distinct; + + if (is_limit_with_ties) + buffer << ", is_limit_with_ties: " << is_limit_with_ties; + + if (is_group_by_with_totals) + buffer << ", is_group_by_with_totals: " << is_group_by_with_totals; + + std::string group_by_type; + if (is_group_by_with_rollup) + group_by_type = "rollup"; + else if (is_group_by_with_cube) + group_by_type = "cube"; + else if (is_group_by_with_grouping_sets) + group_by_type = "grouping_sets"; + + if (!group_by_type.empty()) + buffer << ", group_by_type: " << group_by_type; + + if (!cte_name.empty()) + buffer << ", cte_name: " << cte_name; + + if (constant_value) + { + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + } + + if (table_expression_modifiers) + { + buffer << ", "; + table_expression_modifiers->dump(buffer); + } + + if (hasWith()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "WITH\n"; + getWith().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (!projection_columns.empty()) + { + buffer << '\n'; + buffer << std::string(indent + 2, ' ') << "PROJECTION COLUMNS\n"; + + size_t projection_columns_size = projection_columns.size(); + for (size_t i = 0; i < projection_columns_size; ++i) + { + const auto & projection_column = projection_columns[i]; + buffer << std::string(indent + 4, ' ') << projection_column.name << " " << projection_column.type->getName(); + if (i + 1 != projection_columns_size) + buffer << '\n'; + } + } + + buffer << '\n'; + buffer << std::string(indent + 2, ' ') << "PROJECTION\n"; + getProjection().dumpTreeImpl(buffer, format_state, indent + 4); + + if (getJoinTree()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "JOIN TREE\n"; + getJoinTree()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (getPrewhere()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "PREWHERE\n"; + getPrewhere()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (getWhere()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "WHERE\n"; + getWhere()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasGroupBy()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "GROUP BY\n"; + getGroupBy().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasHaving()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "HAVING\n"; + getHaving()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasWindow()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "WINDOW\n"; + getWindow().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasOrderBy()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "ORDER BY\n"; + getOrderBy().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasInterpolate()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE\n"; + getInterpolate()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasLimitByLimit()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "LIMIT BY LIMIT\n"; + getLimitByLimit()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasLimitByOffset()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "LIMIT BY OFFSET\n"; + getLimitByOffset()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasLimitBy()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "LIMIT BY\n"; + getLimitBy().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasLimit()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "LIMIT\n"; + getLimit()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasOffset()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "OFFSET\n"; + getOffset()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + + if (constant_value && rhs_typed.constant_value && *constant_value != *rhs_typed.constant_value) + return false; + else if (constant_value && !rhs_typed.constant_value) + return false; + else if (!constant_value && rhs_typed.constant_value) + return false; + + if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers) + return false; + else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers) + return false; + else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers) + return false; + + return is_subquery == rhs_typed.is_subquery && + is_cte == rhs_typed.is_cte && + cte_name == rhs_typed.cte_name && + projection_columns == rhs_typed.projection_columns && + is_distinct == rhs_typed.is_distinct && + is_limit_with_ties == rhs_typed.is_limit_with_ties && + is_group_by_with_totals == rhs_typed.is_group_by_with_totals && + is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup && + is_group_by_with_cube == rhs_typed.is_group_by_with_cube && + is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets; +} + +void QueryNode::updateTreeHashImpl(HashState & state) const +{ + state.update(is_subquery); + state.update(is_cte); + + state.update(cte_name.size()); + state.update(cte_name); + + state.update(projection_columns.size()); + for (const auto & projection_column : projection_columns) + { + state.update(projection_column.name.size()); + state.update(projection_column.name); + + auto projection_column_type_name = projection_column.type->getName(); + state.update(projection_column_type_name.size()); + state.update(projection_column_type_name); + } + + state.update(is_distinct); + state.update(is_limit_with_ties); + state.update(is_group_by_with_totals); + state.update(is_group_by_with_rollup); + state.update(is_group_by_with_cube); + state.update(is_group_by_with_grouping_sets); + + if (constant_value) + { + auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue()); + state.update(constant_dump.size()); + state.update(constant_dump); + + auto constant_value_type_name = constant_value->getType()->getName(); + state.update(constant_value_type_name.size()); + state.update(constant_value_type_name); + } + + if (table_expression_modifiers) + table_expression_modifiers->updateTreeHash(state); +} + +QueryTreeNodePtr QueryNode::cloneImpl() const +{ + auto result_query_node = std::make_shared(); + + result_query_node->is_subquery = is_subquery; + result_query_node->is_cte = is_cte; + result_query_node->is_distinct = is_distinct; + result_query_node->is_limit_with_ties = is_limit_with_ties; + result_query_node->is_group_by_with_totals = is_group_by_with_totals; + result_query_node->is_group_by_with_rollup = is_group_by_with_rollup; + result_query_node->is_group_by_with_cube = is_group_by_with_cube; + result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets; + result_query_node->cte_name = cte_name; + result_query_node->projection_columns = projection_columns; + result_query_node->constant_value = constant_value; + result_query_node->table_expression_modifiers = table_expression_modifiers; + + return result_query_node; +} + +ASTPtr QueryNode::toASTImpl() const +{ + auto select_query = std::make_shared(); + select_query->distinct = is_distinct; + select_query->limit_with_ties = is_limit_with_ties; + select_query->group_by_with_totals = is_group_by_with_totals; + select_query->group_by_with_rollup = is_group_by_with_rollup; + select_query->group_by_with_cube = is_group_by_with_cube; + select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets; + + if (hasWith()) + select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST()); + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, getProjection().toAST()); + + ASTPtr tables_in_select_query_ast = std::make_shared(); + addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree()); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast)); + + if (getPrewhere()) + select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST()); + + if (getWhere()) + select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST()); + + if (hasGroupBy()) + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST()); + + if (hasHaving()) + select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST()); + + if (hasWindow()) + select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST()); + + if (hasOrderBy()) + select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST()); + + if (hasInterpolate()) + select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST()); + + if (hasLimitByLimit()) + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST()); + + if (hasLimitByOffset()) + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST()); + + if (hasLimitBy()) + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST()); + + if (hasLimit()) + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST()); + + if (hasOffset()) + select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST()); + + if (hasSettingsChanges()) + { + auto settings_query = std::make_shared(); + settings_query->changes = settings_changes; + select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings_query)); + } + + auto result_select_query = std::make_shared(); + result_select_query->union_mode = SelectUnionMode::UNION_DEFAULT; + + auto list_of_selects = std::make_shared(); + list_of_selects->children.push_back(std::move(select_query)); + + result_select_query->children.push_back(std::move(list_of_selects)); + result_select_query->list_of_selects = result_select_query->children.back(); + + if (is_subquery) + { + auto subquery = std::make_shared(); + + subquery->cte_name = cte_name; + subquery->children.push_back(std::move(result_select_query)); + + return subquery; + } + + return result_select_query; +} + +} diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h new file mode 100644 index 00000000000..05d393b4212 --- /dev/null +++ b/src/Analyzer/QueryNode.h @@ -0,0 +1,628 @@ +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +/** Query node represents query in query tree. + * + * Example: SELECT * FROM test_table WHERE id == 0; + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id; + * + * Query node consists of following sections. + * 1. WITH section. + * 2. PROJECTION section. + * 3. JOIN TREE section. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id; + * test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id - JOIN TREE section. + * 4. PREWHERE section. + * 5. WHERE section. + * 6. GROUP BY section. + * 7. HAVING section. + * 8. WINDOW section. + * Example: SELECT * FROM test_table WINDOW window AS (PARTITION BY id); + * 9. ORDER BY section. + * 10. INTERPOLATE section. + * Example: SELECT * FROM test_table ORDER BY id WITH FILL INTERPOLATE (value AS value + 1); + * value AS value + 1 - INTERPOLATE section. + * 11. LIMIT BY limit section. + * 12. LIMIT BY offset section. + * 13. LIMIT BY section. + * Example: SELECT * FROM test_table LIMIT 1 AS a OFFSET 5 AS b BY id, value; + * 1 AS a - LIMIT BY limit section. + * 5 AS b - LIMIT BY offset section. + * id, value - LIMIT BY section. + * 14. LIMIT section. + * 15. OFFSET section. + * + * Query node contains settings changes that must be applied before query analysis or execution. + * Example: SELECT * FROM test_table SETTINGS prefer_column_name_to_alias = 1, join_use_nulls = 1; + * + * Query node can be used as CTE. + * Example: WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery; + * + * Query node can be used as scalar subquery. + * Example: SELECT (SELECT 1) AS scalar_subquery. + * + * During query analysis pass query node must be resolved with projection columns. + */ +class QueryNode; +using QueryNodePtr = std::shared_ptr; + +class QueryNode final : public IQueryTreeNode +{ +public: + explicit QueryNode(); + + /// Returns true if query node is subquery, false otherwise + bool isSubquery() const + { + return is_subquery; + } + + /// Set query node is subquery value + void setIsSubquery(bool is_subquery_value) + { + is_subquery = is_subquery_value; + } + + /// Returns true if query node is CTE, false otherwise + bool isCTE() const + { + return is_cte; + } + + /// Set query node is CTE + void setIsCTE(bool is_cte_value) + { + is_cte = is_cte_value; + } + + /// Get query node CTE name + const std::string & getCTEName() const + { + return cte_name; + } + + /// Set query node CTE name + void setCTEName(std::string cte_name_value) + { + cte_name = std::move(cte_name_value); + } + + /// Returns true if query node has DISTINCT, false otherwise + bool isDistinct() const + { + return is_distinct; + } + + /// Set query node DISTINCT value + void setIsDistinct(bool is_distinct_value) + { + is_distinct = is_distinct_value; + } + + /// Returns true if query node has LIMIT WITH TIES, false otherwise + bool isLimitWithTies() const + { + return is_limit_with_ties; + } + + /// Set query node LIMIT WITH TIES value + void setIsLimitWithTies(bool is_limit_with_ties_value) + { + is_limit_with_ties = is_limit_with_ties_value; + } + + /// Returns true, if query node has GROUP BY WITH TOTALS, false otherwise + bool isGroupByWithTotals() const + { + return is_group_by_with_totals; + } + + /// Set query node GROUP BY WITH TOTALS value + void setIsGroupByWithTotals(bool is_group_by_with_totals_value) + { + is_group_by_with_totals = is_group_by_with_totals_value; + } + + /// Returns true, if query node has GROUP BY with ROLLUP modifier, false otherwise + bool isGroupByWithRollup() const + { + return is_group_by_with_rollup; + } + + /// Set query node GROUP BY with ROLLUP modifier value + void setIsGroupByWithRollup(bool is_group_by_with_rollup_value) + { + is_group_by_with_rollup = is_group_by_with_rollup_value; + } + + /// Returns true, if query node has GROUP BY with CUBE modifier, false otherwise + bool isGroupByWithCube() const + { + return is_group_by_with_cube; + } + + /// Set query node GROUP BY with CUBE modifier value + void setIsGroupByWithCube(bool is_group_by_with_cube_value) + { + is_group_by_with_cube = is_group_by_with_cube_value; + } + + /// Returns true, if query node has GROUP BY with GROUPING SETS modifier, false otherwise + bool isGroupByWithGroupingSets() const + { + return is_group_by_with_grouping_sets; + } + + /// Set query node GROUP BY with GROUPING SETS modifier value + void setIsGroupByWithGroupingSets(bool is_group_by_with_grouping_sets_value) + { + is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value; + } + + /// Return true if query node has table expression modifiers, false otherwise + bool hasTableExpressionModifiers() const + { + return table_expression_modifiers.has_value(); + } + + /// Get table expression modifiers + const std::optional & getTableExpressionModifiers() const + { + return table_expression_modifiers; + } + + /// Set table expression modifiers + void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) + { + table_expression_modifiers = std::move(table_expression_modifiers_value); + } + + /// Returns true if query node WITH section is not empty, false otherwise + bool hasWith() const + { + return !getWith().getNodes().empty(); + } + + /// Get WITH section + const ListNode & getWith() const + { + return children[with_child_index]->as(); + } + + /// Get WITH section + ListNode & getWith() + { + return children[with_child_index]->as(); + } + + /// Get WITH section node + const QueryTreeNodePtr & getWithNode() const + { + return children[with_child_index]; + } + + /// Get WITH section node + QueryTreeNodePtr & getWithNode() + { + return children[with_child_index]; + } + + /// Get PROJECTION section + const ListNode & getProjection() const + { + return children[projection_child_index]->as(); + } + + /// Get PROJECTION section + ListNode & getProjection() + { + return children[projection_child_index]->as(); + } + + /// Get PROJECTION section node + const QueryTreeNodePtr & getProjectionNode() const + { + return children[projection_child_index]; + } + + /// Get PROJECTION section node + QueryTreeNodePtr & getProjectionNode() + { + return children[projection_child_index]; + } + + /// Get JOIN TREE section node + const QueryTreeNodePtr & getJoinTree() const + { + return children[join_tree_child_index]; + } + + /// Get JOIN TREE section node + QueryTreeNodePtr & getJoinTree() + { + return children[join_tree_child_index]; + } + + /// Returns true if query node PREWHERE section is not empty, false otherwise + bool hasPrewhere() const + { + return children[prewhere_child_index] != nullptr; + } + + /// Get PREWHERE section node + const QueryTreeNodePtr & getPrewhere() const + { + return children[prewhere_child_index]; + } + + /// Get PREWHERE section node + QueryTreeNodePtr & getPrewhere() + { + return children[prewhere_child_index]; + } + + /// Returns true if query node WHERE section is not empty, false otherwise + bool hasWhere() const + { + return children[where_child_index] != nullptr; + } + + /// Get WHERE section node + const QueryTreeNodePtr & getWhere() const + { + return children[where_child_index]; + } + + /// Get WHERE section node + QueryTreeNodePtr & getWhere() + { + return children[where_child_index]; + } + + /// Returns true if query node GROUP BY section is not empty, false otherwise + bool hasGroupBy() const + { + return !getGroupBy().getNodes().empty(); + } + + /// Get GROUP BY section + const ListNode & getGroupBy() const + { + return children[group_by_child_index]->as(); + } + + /// Get GROUP BY section + ListNode & getGroupBy() + { + return children[group_by_child_index]->as(); + } + + /// Get GROUP BY section node + const QueryTreeNodePtr & getGroupByNode() const + { + return children[group_by_child_index]; + } + + /// Get GROUP BY section node + QueryTreeNodePtr & getGroupByNode() + { + return children[group_by_child_index]; + } + + /// Returns true if query node HAVING section is not empty, false otherwise + bool hasHaving() const + { + return getHaving() != nullptr; + } + + /// Get HAVING section node + const QueryTreeNodePtr & getHaving() const + { + return children[having_child_index]; + } + + /// Get HAVING section node + QueryTreeNodePtr & getHaving() + { + return children[having_child_index]; + } + + /// Returns true if query node WINDOW section is not empty, false otherwise + bool hasWindow() const + { + return !getWindow().getNodes().empty(); + } + + /// Get WINDOW section + const ListNode & getWindow() const + { + return children[window_child_index]->as(); + } + + /// Get WINDOW section + ListNode & getWindow() + { + return children[window_child_index]->as(); + } + + /// Get WINDOW section node + const QueryTreeNodePtr & getWindowNode() const + { + return children[window_child_index]; + } + + /// Get WINDOW section node + QueryTreeNodePtr & getWindowNode() + { + return children[window_child_index]; + } + + /// Returns true if query node ORDER BY section is not empty, false otherwise + bool hasOrderBy() const + { + return !getOrderBy().getNodes().empty(); + } + + /// Get ORDER BY section + const ListNode & getOrderBy() const + { + return children[order_by_child_index]->as(); + } + + /// Get ORDER BY section + ListNode & getOrderBy() + { + return children[order_by_child_index]->as(); + } + + /// Get ORDER BY section node + const QueryTreeNodePtr & getOrderByNode() const + { + return children[order_by_child_index]; + } + + /// Get ORDER BY section node + QueryTreeNodePtr & getOrderByNode() + { + return children[order_by_child_index]; + } + + /// Returns true if query node INTERPOLATE section is not empty, false otherwise + bool hasInterpolate() const + { + return getInterpolate() != nullptr; + } + + /// Get INTERPOLATE section node + const QueryTreeNodePtr & getInterpolate() const + { + return children[interpolate_child_index]; + } + + /// Get INTERPOLATE section node + QueryTreeNodePtr & getInterpolate() + { + return children[interpolate_child_index]; + } + + /// Returns true if query node LIMIT BY LIMIT section is not empty, false otherwise + bool hasLimitByLimit() const + { + return children[limit_by_limit_child_index] != nullptr; + } + + /// Get LIMIT BY LIMIT section node + const QueryTreeNodePtr & getLimitByLimit() const + { + return children[limit_by_limit_child_index]; + } + + /// Get LIMIT BY LIMIT section node + QueryTreeNodePtr & getLimitByLimit() + { + return children[limit_by_limit_child_index]; + } + + /// Returns true if query node LIMIT BY OFFSET section is not empty, false otherwise + bool hasLimitByOffset() const + { + return children[limit_by_offset_child_index] != nullptr; + } + + /// Get LIMIT BY OFFSET section node + const QueryTreeNodePtr & getLimitByOffset() const + { + return children[limit_by_offset_child_index]; + } + + /// Get LIMIT BY OFFSET section node + QueryTreeNodePtr & getLimitByOffset() + { + return children[limit_by_offset_child_index]; + } + + /// Returns true if query node LIMIT BY section is not empty, false otherwise + bool hasLimitBy() const + { + return !getLimitBy().getNodes().empty(); + } + + /// Get LIMIT BY section + const ListNode & getLimitBy() const + { + return children[limit_by_child_index]->as(); + } + + /// Get LIMIT BY section + ListNode & getLimitBy() + { + return children[limit_by_child_index]->as(); + } + + /// Get LIMIT BY section node + const QueryTreeNodePtr & getLimitByNode() const + { + return children[limit_by_child_index]; + } + + /// Get LIMIT BY section node + QueryTreeNodePtr & getLimitByNode() + { + return children[limit_by_child_index]; + } + + /// Returns true if query node LIMIT section is not empty, false otherwise + bool hasLimit() const + { + return children[limit_child_index] != nullptr; + } + + /// Get LIMIT section node + const QueryTreeNodePtr & getLimit() const + { + return children[limit_child_index]; + } + + /// Get LIMIT section node + QueryTreeNodePtr & getLimit() + { + return children[limit_child_index]; + } + + /// Returns true if query node OFFSET section is not empty, false otherwise + bool hasOffset() const + { + return children[offset_child_index] != nullptr; + } + + /// Get OFFSET section node + const QueryTreeNodePtr & getOffset() const + { + return children[offset_child_index]; + } + + /// Get OFFSET section node + QueryTreeNodePtr & getOffset() + { + return children[offset_child_index]; + } + + /// Returns true if query node has settings changes specified, false otherwise + bool hasSettingsChanges() const + { + return !settings_changes.empty(); + } + + /// Get query node settings changes + const SettingsChanges & getSettingsChanges() const + { + return settings_changes; + } + + /// Set query node settings changes value + void setSettingsChanges(SettingsChanges settings_changes_value) + { + settings_changes = std::move(settings_changes_value); + } + + /// Get query node projection columns + const NamesAndTypes & getProjectionColumns() const + { + return projection_columns; + } + + /// Resolve query node projection columns + void resolveProjectionColumns(NamesAndTypes projection_columns_value) + { + projection_columns = std::move(projection_columns_value); + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::QUERY; + } + + String getName() const override; + + DataTypePtr getResultType() const override + { + if (constant_value) + return constant_value->getType(); + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for non scalar query node"); + } + + /// Perform constant folding for scalar subquery node + void performConstantFolding(ConstantValuePtr constant_folded_value) + { + constant_value = std::move(constant_folded_value); + } + + ConstantValuePtr getConstantValueOrNull() const override + { + return constant_value; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState &) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + bool is_subquery = false; + bool is_cte = false; + bool is_distinct = false; + bool is_limit_with_ties = false; + bool is_group_by_with_totals = false; + bool is_group_by_with_rollup = false; + bool is_group_by_with_cube = false; + bool is_group_by_with_grouping_sets = false; + + std::string cte_name; + NamesAndTypes projection_columns; + ConstantValuePtr constant_value; + std::optional table_expression_modifiers; + SettingsChanges settings_changes; + + static constexpr size_t with_child_index = 0; + static constexpr size_t projection_child_index = 1; + static constexpr size_t join_tree_child_index = 2; + static constexpr size_t prewhere_child_index = 3; + static constexpr size_t where_child_index = 4; + static constexpr size_t group_by_child_index = 5; + static constexpr size_t having_child_index = 6; + static constexpr size_t window_child_index = 7; + static constexpr size_t order_by_child_index = 8; + static constexpr size_t interpolate_child_index = 9; + static constexpr size_t limit_by_limit_child_index = 10; + static constexpr size_t limit_by_offset_child_index = 11; + static constexpr size_t limit_by_child_index = 12; + static constexpr size_t limit_child_index = 13; + static constexpr size_t offset_child_index = 14; + static constexpr size_t children_size = offset_child_index + 1; +}; + +} diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp new file mode 100644 index 00000000000..890aa2b01a2 --- /dev/null +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -0,0 +1,887 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int EXPECTED_ALL_OR_ANY; + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class QueryTreeBuilder +{ +public: + explicit QueryTreeBuilder(ASTPtr query_, ContextPtr context_); + + QueryTreeNodePtr getQueryTreeNode() + { + return query_tree_node; + } + +private: + QueryTreeNodePtr buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const; + + QueryTreeNodePtr buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const; + + QueryTreeNodePtr buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const; + + QueryTreeNodePtr buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const; + + QueryTreeNodePtr buildSortList(const ASTPtr & order_by_expression_list) const; + + QueryTreeNodePtr buildInterpolateList(const ASTPtr & interpolate_expression_list) const; + + QueryTreeNodePtr buildWindowList(const ASTPtr & window_definition_list) const; + + QueryTreeNodePtr buildExpressionList(const ASTPtr & expression_list) const; + + QueryTreeNodePtr buildExpression(const ASTPtr & expression) const; + + QueryTreeNodePtr buildWindow(const ASTPtr & window_definition) const; + + QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query) const; + + ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const; + + ASTPtr query; + ContextPtr context; + QueryTreeNodePtr query_tree_node; + +}; + +QueryTreeBuilder::QueryTreeBuilder(ASTPtr query_, ContextPtr context_) + : query(query_->clone()) + , context(std::move(context_)) +{ + if (query->as() || + query->as() || + query->as()) + query_tree_node = buildSelectOrUnionExpression(query, false /*is_subquery*/, {} /*cte_name*/); + else if (query->as()) + query_tree_node = buildExpressionList(query); + else + query_tree_node = buildExpression(query); +} + +QueryTreeNodePtr QueryTreeBuilder::buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const +{ + QueryTreeNodePtr query_node; + + if (select_or_union_query->as()) + query_node = buildSelectWithUnionExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/); + else if (select_or_union_query->as()) + query_node = buildSelectIntersectExceptQuery(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/); + else if (select_or_union_query->as()) + query_node = buildSelectExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/); + else + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "SELECT or UNION query {} is not supported", select_or_union_query->formatForErrorMessage()); + + return query_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const +{ + auto & select_with_union_query_typed = select_with_union_query->as(); + auto & select_lists = select_with_union_query_typed.list_of_selects->as(); + + if (select_lists.children.size() == 1) + return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name); + + auto union_node = std::make_shared(); + union_node->setIsSubquery(is_subquery); + union_node->setIsCTE(!cte_name.empty()); + union_node->setCTEName(cte_name); + union_node->setUnionMode(select_with_union_query_typed.union_mode); + union_node->setUnionModes(select_with_union_query_typed.list_of_modes); + union_node->setOriginalAST(select_with_union_query); + + size_t select_lists_children_size = select_lists.children.size(); + + for (size_t i = 0; i < select_lists_children_size; ++i) + { + auto & select_list_node = select_lists.children[i]; + QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/); + union_node->getQueries().getNodes().push_back(std::move(query_node)); + } + + return union_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const +{ + auto & select_intersect_except_query_typed = select_intersect_except_query->as(); + auto select_lists = select_intersect_except_query_typed.getListOfSelects(); + + if (select_lists.size() == 1) + return buildSelectExpression(select_lists[0], is_subquery, cte_name); + + auto union_node = std::make_shared(); + union_node->setIsSubquery(is_subquery); + union_node->setIsCTE(!cte_name.empty()); + union_node->setCTEName(cte_name); + + if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL) + union_node->setUnionMode(SelectUnionMode::INTERSECT_ALL); + else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_DISTINCT) + union_node->setUnionMode(SelectUnionMode::INTERSECT_DISTINCT); + else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_ALL) + union_node->setUnionMode(SelectUnionMode::EXCEPT_ALL); + else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_DISTINCT) + union_node->setUnionMode(SelectUnionMode::EXCEPT_DISTINCT); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized"); + + union_node->setUnionModes(SelectUnionModes(select_lists.size() - 1, union_node->getUnionMode())); + union_node->setOriginalAST(select_intersect_except_query); + + size_t select_lists_size = select_lists.size(); + + for (size_t i = 0; i < select_lists_size; ++i) + { + auto & select_list_node = select_lists[i]; + QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/); + union_node->getQueries().getNodes().push_back(std::move(query_node)); + } + + return union_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const +{ + const auto & select_query_typed = select_query->as(); + auto current_query_tree = std::make_shared(); + + current_query_tree->setIsSubquery(is_subquery); + current_query_tree->setIsCTE(!cte_name.empty()); + current_query_tree->setCTEName(cte_name); + current_query_tree->setIsDistinct(select_query_typed.distinct); + current_query_tree->setIsLimitWithTies(select_query_typed.limit_with_ties); + current_query_tree->setIsGroupByWithTotals(select_query_typed.group_by_with_totals); + current_query_tree->setIsGroupByWithCube(select_query_typed.group_by_with_cube); + current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup); + current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets); + current_query_tree->setOriginalAST(select_query); + + auto select_settings = select_query_typed.settings(); + if (select_settings) + { + auto & set_query = select_settings->as(); + current_query_tree->setSettingsChanges(set_query.changes); + } + + current_query_tree->getJoinTree() = buildJoinTree(select_query_typed.tables()); + + auto select_with_list = select_query_typed.with(); + if (select_with_list) + current_query_tree->getWithNode() = buildExpressionList(select_with_list); + + auto select_expression_list = select_query_typed.select(); + if (select_expression_list) + current_query_tree->getProjectionNode() = buildExpressionList(select_expression_list); + + auto prewhere_expression = select_query_typed.prewhere(); + if (prewhere_expression) + current_query_tree->getPrewhere() = buildExpression(prewhere_expression); + + auto where_expression = select_query_typed.where(); + if (where_expression) + current_query_tree->getWhere() = buildExpression(where_expression); + + auto group_by_list = select_query_typed.groupBy(); + if (group_by_list) + { + auto & group_by_children = group_by_list->children; + + if (current_query_tree->isGroupByWithGroupingSets()) + { + auto grouping_sets_list_node = std::make_shared(); + + for (auto & grouping_sets_keys : group_by_children) + { + auto grouping_sets_keys_list_node = buildExpressionList(grouping_sets_keys); + current_query_tree->getGroupBy().getNodes().emplace_back(std::move(grouping_sets_keys_list_node)); + } + } + else + { + current_query_tree->getGroupByNode() = buildExpressionList(group_by_list); + } + } + + auto having_expression = select_query_typed.having(); + if (having_expression) + current_query_tree->getHaving() = buildExpression(having_expression); + + auto window_list = select_query_typed.window(); + if (window_list) + current_query_tree->getWindowNode() = buildWindowList(window_list); + + auto select_order_by_list = select_query_typed.orderBy(); + if (select_order_by_list) + current_query_tree->getOrderByNode() = buildSortList(select_order_by_list); + + auto interpolate_list = select_query_typed.interpolate(); + if (interpolate_list) + current_query_tree->getInterpolate() = buildInterpolateList(interpolate_list); + + auto select_limit_by_limit = select_query_typed.limitByLength(); + if (select_limit_by_limit) + current_query_tree->getLimitByLimit() = buildExpression(select_limit_by_limit); + + auto select_limit_by_offset = select_query_typed.limitOffset(); + if (select_limit_by_offset) + current_query_tree->getLimitByOffset() = buildExpression(select_limit_by_offset); + + auto select_limit_by = select_query_typed.limitBy(); + if (select_limit_by) + current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by); + + auto select_limit = select_query_typed.limitLength(); + if (select_limit) + current_query_tree->getLimit() = buildExpression(select_limit); + + auto select_offset = select_query_typed.limitOffset(); + if (select_offset) + current_query_tree->getOffset() = buildExpression(select_offset); + + return current_query_tree; +} + +QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_expression_list) const +{ + auto list_node = std::make_shared(); + + auto & expression_list_typed = order_by_expression_list->as(); + list_node->getNodes().reserve(expression_list_typed.children.size()); + + for (auto & expression : expression_list_typed.children) + { + const auto & order_by_element = expression->as(); + + auto sort_direction = order_by_element.direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING; + std::optional nulls_sort_direction; + if (order_by_element.nulls_direction_was_explicitly_specified) + nulls_sort_direction = order_by_element.nulls_direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING; + + std::shared_ptr collator; + if (order_by_element.collation) + collator = std::make_shared(order_by_element.collation->as().value.get()); + + const auto & sort_expression_ast = order_by_element.children.at(0); + auto sort_expression = buildExpression(sort_expression_ast); + auto sort_node = std::make_shared(std::move(sort_expression), + sort_direction, + nulls_sort_direction, + std::move(collator), + order_by_element.with_fill); + + if (order_by_element.fill_from) + sort_node->getFillFrom() = buildExpression(order_by_element.fill_from); + if (order_by_element.fill_to) + sort_node->getFillTo() = buildExpression(order_by_element.fill_to); + if (order_by_element.fill_step) + sort_node->getFillStep() = buildExpression(order_by_element.fill_step); + + list_node->getNodes().push_back(std::move(sort_node)); + } + + return list_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildInterpolateList(const ASTPtr & interpolate_expression_list) const +{ + auto list_node = std::make_shared(); + + auto & expression_list_typed = interpolate_expression_list->as(); + list_node->getNodes().reserve(expression_list_typed.children.size()); + + for (auto & expression : expression_list_typed.children) + { + const auto & interpolate_element = expression->as(); + auto expression_to_interpolate = std::make_shared(Identifier(interpolate_element.column)); + auto interpolate_expression = buildExpression(interpolate_element.expr); + auto interpolate_node = std::make_shared(std::move(expression_to_interpolate), std::move(interpolate_expression)); + + list_node->getNodes().push_back(std::move(interpolate_node)); + } + + return list_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildWindowList(const ASTPtr & window_definition_list) const +{ + auto list_node = std::make_shared(); + + auto & expression_list_typed = window_definition_list->as(); + list_node->getNodes().reserve(expression_list_typed.children.size()); + + for (auto & window_list_element : expression_list_typed.children) + { + const auto & window_list_element_typed = window_list_element->as(); + + auto window_node = buildWindow(window_list_element_typed.definition); + window_node->setAlias(window_list_element_typed.name); + + list_node->getNodes().push_back(std::move(window_node)); + } + + return list_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildExpressionList(const ASTPtr & expression_list) const +{ + auto list_node = std::make_shared(); + + auto & expression_list_typed = expression_list->as(); + list_node->getNodes().reserve(expression_list_typed.children.size()); + + for (auto & expression : expression_list_typed.children) + { + auto expression_node = buildExpression(expression); + list_node->getNodes().push_back(std::move(expression_node)); + } + + return list_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) const +{ + QueryTreeNodePtr result; + + if (const auto * ast_identifier = expression->as()) + { + auto identifier = Identifier(ast_identifier->name_parts); + result = std::make_shared(std::move(identifier)); + } + else if (const auto * asterisk = expression->as()) + { + auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/); + result = std::make_shared(std::move(column_transformers)); + } + else if (const auto * qualified_asterisk = expression->as()) + { + auto & qualified_identifier = qualified_asterisk->children.at(0)->as(); + auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/); + result = std::make_shared(Identifier(qualified_identifier.name_parts), std::move(column_transformers)); + } + else if (const auto * ast_literal = expression->as()) + { + result = std::make_shared(ast_literal->value); + } + else if (const auto * function = expression->as()) + { + if (function->is_lambda_function) + { + const auto & lambda_arguments_and_expression = function->arguments->as().children; + auto & lambda_arguments_tuple = lambda_arguments_and_expression.at(0)->as(); + + auto lambda_arguments_nodes = std::make_shared(); + Names lambda_arguments; + NameSet lambda_arguments_set; + + if (lambda_arguments_tuple.arguments) + { + const auto & lambda_arguments_list = lambda_arguments_tuple.arguments->as().children; + for (const auto & lambda_argument : lambda_arguments_list) + { + const auto * lambda_argument_identifier = lambda_argument->as(); + + if (!lambda_argument_identifier) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Lambda {} argument is not identifier", + function->formatForErrorMessage()); + + if (lambda_argument_identifier->name_parts.size() > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Lambda {} argument identifier must contain single part. Actual {}", + function->formatForErrorMessage(), + lambda_argument_identifier->full_name); + + const auto & argument_name = lambda_argument_identifier->name_parts[0]; + auto [_, inserted] = lambda_arguments_set.insert(argument_name); + if (!inserted) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Lambda {} multiple arguments with same name {}", + function->formatForErrorMessage(), + argument_name); + + lambda_arguments.push_back(argument_name); + } + } + + const auto & lambda_expression = lambda_arguments_and_expression.at(1); + auto lambda_expression_node = buildExpression(lambda_expression); + + result = std::make_shared(std::move(lambda_arguments), std::move(lambda_expression_node)); + } + else + { + auto function_node = std::make_shared(function->name); + + if (function->parameters) + { + const auto & function_parameters_list = function->parameters->as()->children; + for (const auto & argument : function_parameters_list) + function_node->getParameters().getNodes().push_back(buildExpression(argument)); + } + + if (function->arguments) + { + const auto & function_arguments_list = function->arguments->as()->children; + for (const auto & argument : function_arguments_list) + function_node->getArguments().getNodes().push_back(buildExpression(argument)); + } + + if (function->is_window_function) + { + if (function->window_definition) + function_node->getWindowNode() = buildWindow(function->window_definition); + else + function_node->getWindowNode() = std::make_shared(Identifier(function->window_name)); + } + + result = std::move(function_node); + } + } + else if (const auto * subquery = expression->as()) + { + auto subquery_query = subquery->children[0]; + auto query_node = buildSelectWithUnionExpression(subquery_query, true /*is_subquery*/, {} /*cte_name*/); + + result = std::move(query_node); + } + else if (const auto * with_element = expression->as()) + { + auto with_element_subquery = with_element->subquery->as().children.at(0); + auto query_node = buildSelectWithUnionExpression(with_element_subquery, true /*is_subquery*/, with_element->name /*cte_name*/); + + result = std::move(query_node); + } + else if (const auto * columns_regexp_matcher = expression->as()) + { + auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/); + result = std::make_shared(columns_regexp_matcher->getMatcher(), std::move(column_transformers)); + } + else if (const auto * columns_list_matcher = expression->as()) + { + Identifiers column_list_identifiers; + column_list_identifiers.reserve(columns_list_matcher->column_list->children.size()); + + for (auto & column_list_child : columns_list_matcher->column_list->children) + { + auto & column_list_identifier = column_list_child->as(); + column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts}); + } + + auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/); + result = std::make_shared(std::move(column_list_identifiers), std::move(column_transformers)); + } + else if (const auto * qualified_columns_regexp_matcher = expression->as()) + { + auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as(); + auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/); + result = std::make_shared(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers)); + } + else if (const auto * qualified_columns_list_matcher = expression->as()) + { + auto & qualified_identifier = qualified_columns_list_matcher->children.at(0)->as(); + + Identifiers column_list_identifiers; + column_list_identifiers.reserve(qualified_columns_list_matcher->column_list->children.size()); + + for (auto & column_list_child : qualified_columns_list_matcher->column_list->children) + { + auto & column_list_identifier = column_list_child->as(); + column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts}); + } + + auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/); + result = std::make_shared(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers)); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Invalid expression. Expected identifier, literal, matcher, function, subquery. Actual {}", + expression->formatForErrorMessage()); + } + + result->setAlias(expression->tryGetAlias()); + result->setOriginalAST(expression); + + return result; +} + +QueryTreeNodePtr QueryTreeBuilder::buildWindow(const ASTPtr & window_definition) const +{ + const auto & window_definition_typed = window_definition->as(); + WindowFrame window_frame; + + if (!window_definition_typed.frame_is_default) + { + window_frame.is_default = false; + window_frame.type = window_definition_typed.frame_type; + window_frame.begin_type = window_definition_typed.frame_begin_type; + window_frame.begin_preceding = window_definition_typed.frame_begin_preceding; + window_frame.end_type = window_definition_typed.frame_end_type; + window_frame.end_preceding = window_definition_typed.frame_end_preceding; + } + + auto window_node = std::make_shared(window_frame); + window_node->setParentWindowName(window_definition_typed.parent_window_name); + + if (window_definition_typed.partition_by) + window_node->getPartitionByNode() = buildExpressionList(window_definition_typed.partition_by); + + if (window_definition_typed.order_by) + window_node->getOrderByNode() = buildSortList(window_definition_typed.order_by); + + if (window_definition_typed.frame_begin_offset) + window_node->getFrameBeginOffsetNode() = buildExpression(window_definition_typed.frame_begin_offset); + + if (window_definition_typed.frame_end_offset) + window_node->getFrameEndOffsetNode() = buildExpression(window_definition_typed.frame_end_offset); + + window_node->setOriginalAST(window_definition); + + return window_node; +} + +QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select_query) const +{ + if (!tables_in_select_query) + { + /** If no table is specified in SELECT query we substitute system.one table. + * SELECT * FROM system.one; + */ + Identifier storage_identifier("system.one"); + return std::make_shared(storage_identifier); + } + + auto & tables = tables_in_select_query->as(); + + QueryTreeNodes table_expressions; + + for (const auto & table_element_untyped : tables.children) + { + const auto & table_element = table_element_untyped->as(); + + if (table_element.table_expression) + { + auto & table_expression = table_element.table_expression->as(); + std::optional table_expression_modifiers; + + if (table_expression.final || table_expression.sample_size) + { + bool has_final = table_expression.final; + std::optional sample_size_ratio; + std::optional sample_offset_ratio; + + if (table_expression.sample_size) + { + auto & ast_sample_size_ratio = table_expression.sample_size->as(); + sample_size_ratio = ast_sample_size_ratio.ratio; + + if (table_expression.sample_offset) + { + auto & ast_sample_offset_ratio = table_expression.sample_offset->as(); + sample_offset_ratio = ast_sample_offset_ratio.ratio; + } + } + + table_expression_modifiers = TableExpressionModifiers(has_final, sample_size_ratio, sample_offset_ratio); + } + + if (table_expression.database_and_table_name) + { + auto & table_identifier_typed = table_expression.database_and_table_name->as(); + auto storage_identifier = Identifier(table_identifier_typed.name_parts); + QueryTreeNodePtr table_identifier_node; + + if (table_expression_modifiers) + table_identifier_node = std::make_shared(storage_identifier, *table_expression_modifiers); + else + table_identifier_node = std::make_shared(storage_identifier); + + table_identifier_node->setAlias(table_identifier_typed.tryGetAlias()); + table_identifier_node->setOriginalAST(table_element.table_expression); + + table_expressions.push_back(std::move(table_identifier_node)); + } + else if (table_expression.subquery) + { + auto & subquery_expression = table_expression.subquery->as(); + const auto & select_with_union_query = subquery_expression.children[0]; + + auto node = buildSelectWithUnionExpression(select_with_union_query, true /*is_subquery*/, {} /*cte_name*/); + node->setAlias(subquery_expression.tryGetAlias()); + node->setOriginalAST(select_with_union_query); + + if (table_expression_modifiers) + { + if (auto * query_node = node->as()) + query_node->setTableExpressionModifiers(*table_expression_modifiers); + else if (auto * union_node = node->as()) + union_node->setTableExpressionModifiers(*table_expression_modifiers); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected table expression subquery node. Expected union or query. Actual {}", + node->formatASTForErrorMessage()); + } + + table_expressions.push_back(std::move(node)); + } + else if (table_expression.table_function) + { + auto & table_function_expression = table_expression.table_function->as(); + + auto node = std::make_shared(table_function_expression.name); + + if (table_function_expression.arguments) + { + const auto & function_arguments_list = table_function_expression.arguments->as().children; + for (const auto & argument : function_arguments_list) + { + if (argument->as() || argument->as() || argument->as()) + node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/)); + else + node->getArguments().getNodes().push_back(buildExpression(argument)); + } + } + + if (table_expression_modifiers) + node->setTableExpressionModifiers(*table_expression_modifiers); + node->setAlias(table_function_expression.tryGetAlias()); + node->setOriginalAST(table_expression.table_function); + + table_expressions.push_back(std::move(node)); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Unsupported table expression node {}", table_element.table_expression->formatForErrorMessage()); + } + } + + if (table_element.table_join) + { + const auto & table_join = table_element.table_join->as(); + + auto right_table_expression = std::move(table_expressions.back()); + table_expressions.pop_back(); + + auto left_table_expression = std::move(table_expressions.back()); + table_expressions.pop_back(); + + QueryTreeNodePtr join_expression; + + if (table_join.using_expression_list) + join_expression = buildExpressionList(table_join.using_expression_list); + else if (table_join.on_expression) + join_expression = buildExpression(table_join.on_expression); + + const auto & settings = context->getSettingsRef(); + auto join_default_strictness = settings.join_default_strictness; + auto any_join_distinct_right_table_keys = settings.any_join_distinct_right_table_keys; + + JoinStrictness result_join_strictness = table_join.strictness; + JoinKind result_join_kind = table_join.kind; + + if (result_join_strictness == JoinStrictness::Unspecified && (result_join_kind != JoinKind::Cross && result_join_kind != JoinKind::Comma)) + { + if (join_default_strictness == JoinStrictness::Any) + result_join_strictness = JoinStrictness::Any; + else if (join_default_strictness == JoinStrictness::All) + result_join_strictness = JoinStrictness::All; + else + throw Exception(ErrorCodes::EXPECTED_ALL_OR_ANY, + "Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty"); + } + + if (any_join_distinct_right_table_keys) + { + if (result_join_strictness == JoinStrictness::Any && result_join_kind == JoinKind::Inner) + { + result_join_strictness = JoinStrictness::Semi; + result_join_kind = JoinKind::Left; + } + + if (result_join_strictness == JoinStrictness::Any) + result_join_strictness = JoinStrictness::RightAny; + } + else if (result_join_strictness == JoinStrictness::Any && result_join_kind == JoinKind::Full) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); + } + + auto join_node = std::make_shared(std::move(left_table_expression), + std::move(right_table_expression), + std::move(join_expression), + table_join.locality, + result_join_strictness, + result_join_kind); + + /** Original AST is not set because it will contain only join part and does + * not include left table expression. + */ + table_expressions.emplace_back(std::move(join_node)); + } + + if (table_element.array_join) + { + auto & array_join_expression = table_element.array_join->as(); + bool is_left_array_join = array_join_expression.kind == ASTArrayJoin::Kind::Left; + + auto last_table_expression = std::move(table_expressions.back()); + table_expressions.pop_back(); + + auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list); + auto array_join_node = std::make_shared(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join); + + /** Original AST is not set because it will contain only array join part and does + * not include left table expression. + */ + table_expressions.push_back(std::move(array_join_node)); + } + } + + if (table_expressions.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query FROM section cannot be empty"); + + if (table_expressions.size() > 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query FROM section cannot have more than 1 root table expression"); + + return table_expressions.back(); +} + + +ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const +{ + ColumnTransformersNodes column_transformers; + size_t children_size = matcher_expression->children.size(); + + for (; start_child_index < children_size; ++start_child_index) + { + const auto & child = matcher_expression->children[start_child_index]; + + if (auto * apply_transformer = child->as()) + { + if (apply_transformer->lambda) + { + auto lambda_query_tree_node = buildExpression(apply_transformer->lambda); + column_transformers.emplace_back(std::make_shared(std::move(lambda_query_tree_node))); + } + else + { + auto function_node = std::make_shared(apply_transformer->func_name); + if (apply_transformer->parameters) + function_node->getParametersNode() = buildExpressionList(apply_transformer->parameters); + + column_transformers.emplace_back(std::make_shared(std::move(function_node))); + } + } + else if (auto * except_transformer = child->as()) + { + auto matcher = except_transformer->getMatcher(); + if (matcher) + { + column_transformers.emplace_back(std::make_shared(std::move(matcher))); + } + else + { + Names except_column_names; + except_column_names.reserve(except_transformer->children.size()); + + for (auto & except_transformer_child : except_transformer->children) + except_column_names.push_back(except_transformer_child->as().full_name); + + column_transformers.emplace_back(std::make_shared(std::move(except_column_names), except_transformer->is_strict)); + } + } + else if (auto * replace_transformer = child->as()) + { + std::vector replacements; + replacements.reserve(replace_transformer->children.size()); + + for (const auto & replace_transformer_child : replace_transformer->children) + { + auto & replacement = replace_transformer_child->as(); + replacements.emplace_back(ReplaceColumnTransformerNode::Replacement{replacement.name, buildExpression(replacement.expr)}); + } + + column_transformers.emplace_back(std::make_shared(replacements, replace_transformer->is_strict)); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Unsupported column matcher {}", child->formatForErrorMessage()); + } + } + + return column_transformers; +} + +} + +QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context) +{ + QueryTreeBuilder builder(std::move(query), context); + return builder.getQueryTreeNode(); +} + +} diff --git a/src/Analyzer/QueryTreeBuilder.h b/src/Analyzer/QueryTreeBuilder.h new file mode 100644 index 00000000000..de0f6270230 --- /dev/null +++ b/src/Analyzer/QueryTreeBuilder.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +/** Build query tree from AST. + * AST that represent query ASTSelectWithUnionQuery, ASTSelectIntersectExceptQuery, ASTSelectQuery. + * AST that represent a list of expressions ASTExpressionList. + * AST that represent expression ASTIdentifier, ASTAsterisk, ASTLiteral, ASTFunction. + */ +QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context); + +} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp new file mode 100644 index 00000000000..853b4a23f38 --- /dev/null +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -0,0 +1,151 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +/** ClickHouse query tree pass manager. + * + * TODO: Support _shard_num into shardNum() rewriting. + * TODO: Support logical expressions optimizer. + * TODO: Support fuse sum count optimize_fuse_sum_count_avg, optimize_syntax_fuse_functions. + * TODO: Support setting convert_query_to_cnf. + * TODO: Support setting optimize_using_constraints. + * TODO: Support setting optimize_substitute_columns. + * TODO: Support GROUP BY injective function elimination. + * TODO: Support GROUP BY functions of other keys elimination. + * TODO: Support setting optimize_move_functions_out_of_any. + * TODO: Support setting optimize_aggregators_of_group_by_keys. + * TODO: Support setting optimize_duplicate_order_by_and_distinct. + * TODO: Support setting optimize_redundant_functions_in_order_by. + * TODO: Support setting optimize_monotonous_functions_in_order_by. + * TODO: Support setting optimize_if_transform_strings_to_enum. + * TODO: Support settings.optimize_syntax_fuse_functions. + * TODO: Support settings.optimize_or_like_chain. + * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column). + */ + +QueryTreePassManager::QueryTreePassManager(ContextPtr context_) : WithContext(context_) {} + +void QueryTreePassManager::addPass(QueryTreePassPtr pass) +{ + passes.push_back(std::move(pass)); +} + +void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) +{ + auto current_context = getContext(); + size_t passes_size = passes.size(); + + for (size_t i = 0; i < passes_size; ++i) + passes[i]->run(query_tree_node, current_context); +} + +void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index) +{ + size_t passes_size = passes.size(); + if (up_to_pass_index > passes_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Requested to run passes up to {} pass. There are only {} passes", + up_to_pass_index, + passes_size); + + auto current_context = getContext(); + for (size_t i = 0; i < up_to_pass_index; ++i) + passes[i]->run(query_tree_node, current_context); +} + +void QueryTreePassManager::dump(WriteBuffer & buffer) +{ + size_t passes_size = passes.size(); + + for (size_t i = 0; i < passes_size; ++i) + { + auto & pass = passes[i]; + buffer << "Pass " << (i + 1) << ' ' << pass->getName() << " - " << pass->getDescription(); + if (i + 1 != passes_size) + buffer << '\n'; + } +} + +void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index) +{ + size_t passes_size = passes.size(); + if (up_to_pass_index > passes_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Requested to dump passes up to {} pass. There are only {} passes", + up_to_pass_index, + passes_size); + + for (size_t i = 0; i < up_to_pass_index; ++i) + { + auto & pass = passes[i]; + buffer << "Pass " << (i + 1) << " " << pass->getName() << " - " << pass->getDescription(); + if (i + 1 != up_to_pass_index) + buffer << '\n'; + } +} + +void addQueryTreePasses(QueryTreePassManager & manager) +{ + auto context = manager.getContext(); + const auto & settings = context->getSettingsRef(); + + manager.addPass(std::make_shared()); + + if (settings.optimize_functions_to_subcolumns) + manager.addPass(std::make_shared()); + + if (settings.count_distinct_optimization) + manager.addPass(std::make_shared()); + + if (settings.optimize_rewrite_sum_if_to_count_if) + manager.addPass(std::make_shared()); + + if (settings.optimize_normalize_count_variants) + manager.addPass(std::make_shared()); + + manager.addPass(std::make_shared()); + + if (settings.optimize_arithmetic_operations_in_aggregate_functions) + manager.addPass(std::make_shared()); + + if (settings.optimize_injective_functions_inside_uniq) + manager.addPass(std::make_shared()); + + if (settings.optimize_multiif_to_if) + manager.addPass(std::make_shared()); + + manager.addPass(std::make_shared()); + + if (settings.optimize_if_chain_to_multiif) + manager.addPass(std::make_shared()); + + manager.addPass(std::make_shared()); + manager.addPass(std::make_shared()); +} + +} diff --git a/src/Analyzer/QueryTreePassManager.h b/src/Analyzer/QueryTreePassManager.h new file mode 100644 index 00000000000..3c67fc36178 --- /dev/null +++ b/src/Analyzer/QueryTreePassManager.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Query tree pass manager provide functionality to register and run passes + * on query tree. + */ +class QueryTreePassManager : public WithContext +{ +public: + explicit QueryTreePassManager(ContextPtr context_); + + /// Get registered passes + const std::vector & getPasses() const + { + return passes; + } + + /// Add query tree pass + void addPass(QueryTreePassPtr pass); + + /// Run query tree passes on query tree + void run(QueryTreeNodePtr query_tree_node); + + /** Run query tree passes on query tree up to up_to_pass_index. + * Throws exception if up_to_pass_index is greater than passes size. + */ + void run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index); + + /// Dump query tree passes + void dump(WriteBuffer & buffer); + + /** Dump query tree passes to up_to_pass_index. + * Throws exception if up_to_pass_index is greater than passes size. + */ + void dump(WriteBuffer & buffer, size_t up_to_pass_index); + +private: + std::vector passes; +}; + +void addQueryTreePasses(QueryTreePassManager & manager); + +} diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp new file mode 100644 index 00000000000..a72879d2145 --- /dev/null +++ b/src/Analyzer/SetUtils.cpp @@ -0,0 +1,182 @@ +#include + +#include + +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_ELEMENT_OF_SET; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +size_t getCompoundTypeDepth(const IDataType & type) +{ + size_t result = 0; + + const IDataType * current_type = &type; + + while (true) + { + WhichDataType which_type(*current_type); + + if (which_type.isArray()) + { + current_type = assert_cast(*current_type).getNestedType().get(); + ++result; + } + else if (which_type.isTuple()) + { + const auto & tuple_elements = assert_cast(*current_type).getElements(); + if (!tuple_elements.empty()) + current_type = tuple_elements.at(0).get(); + + ++result; + } + else + { + break; + } + } + + return result; +} + +template +Block createBlockFromCollection(const Collection & collection, const DataTypes & block_types, bool transform_null_in) +{ + size_t columns_size = block_types.size(); + MutableColumns columns(columns_size); + for (size_t i = 0; i < columns_size; ++i) + { + columns[i] = block_types[i]->createColumn(); + columns[i]->reserve(collection.size()); + } + + Row tuple_values; + + for (const auto & value : collection) + { + if (columns_size == 1) + { + auto field = convertFieldToType(value, *block_types[0]); + bool need_insert_null = transform_null_in && block_types[0]->isNullable(); + if (!field.isNull() || need_insert_null) + columns[0]->insert(std::move(field)); + + continue; + } + + if (value.getType() != Field::Types::Tuple) + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, + "Invalid type in set. Expected tuple, got {}", + value.getTypeName()); + + const auto & tuple = value.template get(); + size_t tuple_size = tuple.size(); + + if (tuple_size != columns_size) + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, + "Incorrect size of tuple in set: {} instead of {}", + tuple_size, + columns_size); + + if (tuple_values.empty()) + tuple_values.resize(tuple_size); + + size_t i = 0; + for (; i < tuple_size; ++i) + { + tuple_values[i] = convertFieldToType(tuple[i], *block_types[i]); + bool need_insert_null = transform_null_in && block_types[i]->isNullable(); + if (tuple_values[i].isNull() && !need_insert_null) + break; + } + + if (i == tuple_size) + for (i = 0; i < tuple_size; ++i) + columns[i]->insert(tuple_values[i]); + } + + Block res; + for (size_t i = 0; i < columns_size; ++i) + res.insert(ColumnWithTypeAndName{std::move(columns[i]), block_types[i], "argument_" + toString(i)}); + + return res; +} + +} + +SetPtr makeSetForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, const Settings & settings) +{ + DataTypes set_element_types = {expression_type}; + const auto * lhs_tuple_type = typeid_cast(expression_type.get()); + + if (lhs_tuple_type && lhs_tuple_type->getElements().size() != 1) + set_element_types = lhs_tuple_type->getElements(); + + for (auto & set_element_type : set_element_types) + { + if (const auto * set_element_low_cardinality_type = typeid_cast(set_element_type.get())) + set_element_type = set_element_low_cardinality_type->getDictionaryType(); + } + + size_t lhs_type_depth = getCompoundTypeDepth(*expression_type); + size_t rhs_type_depth = getCompoundTypeDepth(*value_type); + + SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; + bool tranform_null_in = settings.transform_null_in; + + Block result_block; + + if (lhs_type_depth == rhs_type_depth) + { + /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. + + Array array{value}; + result_block = createBlockFromCollection(array, set_element_types, tranform_null_in); + } + else if (lhs_type_depth + 1 == rhs_type_depth) + { + /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)) + + WhichDataType rhs_which_type(value_type); + + if (rhs_which_type.isArray()) + result_block = createBlockFromCollection(value.get(), set_element_types, tranform_null_in); + else if (rhs_which_type.isTuple()) + result_block = createBlockFromCollection(value.get(), set_element_types, tranform_null_in); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}", + value_type->getName()); + } + else + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Unsupported types for IN. First argument type {}. Second argument type {}", + expression_type->getName(), + value_type->getName()); + } + + auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in); + + set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName()); + set->insertFromBlock(result_block.getColumnsWithTypeAndName()); + set->finishInsert(); + + return set; +} + +} diff --git a/src/Analyzer/SetUtils.h b/src/Analyzer/SetUtils.h new file mode 100644 index 00000000000..7afc8e5259c --- /dev/null +++ b/src/Analyzer/SetUtils.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include + +#include + +namespace DB +{ + +class Set; +using SetPtr = std::shared_ptr; + +/** Make set for constant part of IN subquery. + * Throws exception if parameters are not valid for IN function. + * + * Example: SELECT id FROM test_table WHERE id IN (1, 2, 3, 4); + * Example: SELECT id FROM test_table WHERE id IN ((1, 2), (3, 4)); + * + * @param expression_type - type of first argument of function IN. + * @param value - constant value of second argument of function IN. + * @param value_type - type of second argument of function IN. + * @param settings - query settings. + * + * @return SetPtr for constant value. + */ +SetPtr makeSetForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, const Settings & settings); + +} diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp new file mode 100644 index 00000000000..f6faccdb7c5 --- /dev/null +++ b/src/Analyzer/SortNode.cpp @@ -0,0 +1,168 @@ +#include + +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +const char * toString(SortDirection sort_direction) +{ + switch (sort_direction) + { + case SortDirection::ASCENDING: return "ASCENDING"; + case SortDirection::DESCENDING: return "DESCENDING"; + } +} + +SortNode::SortNode(QueryTreeNodePtr expression_, + SortDirection sort_direction_, + std::optional nulls_sort_direction_, + std::shared_ptr collator_, + bool with_fill_) + : IQueryTreeNode(children_size) + , sort_direction(sort_direction_) + , nulls_sort_direction(nulls_sort_direction_) + , collator(std::move(collator_)) + , with_fill(with_fill_) +{ + children[sort_expression_child_index] = std::move(expression_); +} + +String SortNode::getName() const +{ + String result = getExpression()->getName(); + + if (sort_direction == SortDirection::ASCENDING) + result += " ASC"; + else + result += " DESC"; + + if (nulls_sort_direction) + { + if (*nulls_sort_direction == SortDirection::ASCENDING) + result += " NULLS FIRST"; + else + result += " NULLS LAST"; + } + + if (with_fill) + result += " WITH FILL"; + + if (hasFillFrom()) + result += " FROM " + getFillFrom()->getName(); + + if (hasFillStep()) + result += " STEP " + getFillStep()->getName(); + + if (hasFillTo()) + result += " TO " + getFillTo()->getName(); + + return result; +} + +void SortNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "SORT id: " << format_state.getNodeId(this); + + buffer << ", sort_direction: " << toString(sort_direction); + if (nulls_sort_direction) + buffer << ", nulls_sort_direction: " << toString(*nulls_sort_direction); + + if (collator) + buffer << ", collator: " << collator->getLocale(); + + buffer << ", with_fill: " << with_fill; + + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n"; + getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + + if (hasFillFrom()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "FILL FROM\n"; + getFillFrom()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasFillTo()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "FILL TO\n"; + getFillTo()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasFillStep()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "FILL STEP\n"; + getFillStep()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (sort_direction != rhs_typed.sort_direction || + nulls_sort_direction != rhs_typed.nulls_sort_direction || + with_fill != rhs_typed.with_fill) + return false; + + if (!collator && !rhs_typed.collator) + return true; + else if (collator && !rhs_typed.collator) + return false; + else if (!collator && rhs_typed.collator) + return false; + + return collator->getLocale() == rhs_typed.collator->getLocale(); +} + +void SortNode::updateTreeHashImpl(HashState & hash_state) const +{ + hash_state.update(sort_direction); + hash_state.update(nulls_sort_direction); + hash_state.update(with_fill); + + if (collator) + { + const auto & locale = collator->getLocale(); + + hash_state.update(locale.size()); + hash_state.update(locale); + } +} + +QueryTreeNodePtr SortNode::cloneImpl() const +{ + return std::make_shared(nullptr /*expression*/, sort_direction, nulls_sort_direction, collator, with_fill); +} + +ASTPtr SortNode::toASTImpl() const +{ + auto result = std::make_shared(); + result->direction = sort_direction == SortDirection::ASCENDING ? 1 : -1; + result->nulls_direction = result->direction; + if (nulls_sort_direction) + result->nulls_direction = *nulls_sort_direction == SortDirection::ASCENDING ? 1 : -1; + + result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value(); + + result->with_fill = with_fill; + result->fill_from = hasFillFrom() ? getFillFrom()->toAST() : nullptr; + result->fill_to = hasFillTo() ? getFillTo()->toAST() : nullptr; + result->fill_step = hasFillStep() ? getFillStep()->toAST() : nullptr; + result->children.push_back(getExpression()->toAST()); + + if (collator) + { + result->children.push_back(std::make_shared(Field(collator->getLocale()))); + result->collation = result->children.back(); + } + + return result; +} + +} diff --git a/src/Analyzer/SortNode.h b/src/Analyzer/SortNode.h new file mode 100644 index 00000000000..b0fe835cf45 --- /dev/null +++ b/src/Analyzer/SortNode.h @@ -0,0 +1,158 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +/** Sort node represents sort description for expression that is part of ORDER BY in query tree. + * Example: SELECT * FROM test_table ORDER BY sort_column_1, sort_column_2; + * Sort node optionally contain collation, fill from, fill to, and fill step. + */ +class SortNode; +using SortNodePtr = std::shared_ptr; + +enum class SortDirection +{ + ASCENDING = 0, + DESCENDING = 1 +}; + +const char * toString(SortDirection sort_direction); + +class SortNode final : public IQueryTreeNode +{ +public: + /// Initialize sort node with sort expression + explicit SortNode(QueryTreeNodePtr expression_, + SortDirection sort_direction_ = SortDirection::ASCENDING, + std::optional nulls_sort_direction_ = {}, + std::shared_ptr collator_ = nullptr, + bool with_fill = false); + + /// Get sort expression + const QueryTreeNodePtr & getExpression() const + { + return children[sort_expression_child_index]; + } + + /// Get sort expression + QueryTreeNodePtr & getExpression() + { + return children[sort_expression_child_index]; + } + + /// Returns true if sort node has with fill, false otherwise + bool withFill() const + { + return with_fill; + } + + /// Returns true if sort node has fill from, false otherwise + bool hasFillFrom() const + { + return children[fill_from_child_index] != nullptr; + } + + /// Get fill from + const QueryTreeNodePtr & getFillFrom() const + { + return children[fill_from_child_index]; + } + + /// Get fill from + QueryTreeNodePtr & getFillFrom() + { + return children[fill_from_child_index]; + } + + /// Returns true if sort node has fill to, false otherwise + bool hasFillTo() const + { + return children[fill_to_child_index] != nullptr; + } + + /// Get fill to + const QueryTreeNodePtr & getFillTo() const + { + return children[fill_to_child_index]; + } + + /// Get fill to + QueryTreeNodePtr & getFillTo() + { + return children[fill_to_child_index]; + } + + /// Returns true if sort node has fill step, false otherwise + bool hasFillStep() const + { + return children[fill_step_child_index] != nullptr; + } + + /// Get fill step + const QueryTreeNodePtr & getFillStep() const + { + return children[fill_step_child_index]; + } + + /// Get fill step + QueryTreeNodePtr & getFillStep() + { + return children[fill_step_child_index]; + } + + /// Get collator + const std::shared_ptr & getCollator() const + { + return collator; + } + + /// Get sort direction + SortDirection getSortDirection() const + { + return sort_direction; + } + + /// Get nulls sort direction + std::optional getNullsSortDirection() const + { + return nulls_sort_direction; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::SORT; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + static constexpr size_t sort_expression_child_index = 0; + static constexpr size_t fill_from_child_index = 1; + static constexpr size_t fill_to_child_index = 2; + static constexpr size_t fill_step_child_index = 3; + static constexpr size_t children_size = fill_step_child_index + 1; + + SortDirection sort_direction = SortDirection::ASCENDING; + std::optional nulls_sort_direction; + std::shared_ptr collator; + bool with_fill = false; +}; + +} diff --git a/src/Analyzer/TableExpressionModifiers.cpp b/src/Analyzer/TableExpressionModifiers.cpp new file mode 100644 index 00000000000..79b5a8dba41 --- /dev/null +++ b/src/Analyzer/TableExpressionModifiers.cpp @@ -0,0 +1,42 @@ +#include + +#include + +#include +#include +#include + +namespace DB +{ + +void TableExpressionModifiers::dump(WriteBuffer & buffer) const +{ + buffer << "final: " << has_final; + + if (sample_size_ratio) + buffer << ", sample_size: " << ASTSampleRatio::toString(*sample_size_ratio); + + if (sample_offset_ratio) + buffer << ", sample_offset: " << ASTSampleRatio::toString(*sample_offset_ratio); +} + +void TableExpressionModifiers::updateTreeHash(SipHash & hash_state) const +{ + hash_state.update(has_final); + hash_state.update(sample_size_ratio.has_value()); + hash_state.update(sample_offset_ratio.has_value()); + + if (sample_size_ratio.has_value()) + { + hash_state.update(sample_size_ratio->numerator); + hash_state.update(sample_size_ratio->denominator); + } + + if (sample_offset_ratio.has_value()) + { + hash_state.update(sample_offset_ratio->numerator); + hash_state.update(sample_offset_ratio->denominator); + } +} + +} diff --git a/src/Analyzer/TableExpressionModifiers.h b/src/Analyzer/TableExpressionModifiers.h new file mode 100644 index 00000000000..cc5ac3948bf --- /dev/null +++ b/src/Analyzer/TableExpressionModifiers.h @@ -0,0 +1,77 @@ +#pragma once + +#include + +namespace DB +{ + +/** Modifiers that can be used for table, table function and subquery in JOIN TREE. + * + * Example: SELECT * FROM test_table SAMPLE 0.1 OFFSET 0.1 FINAL + */ +class TableExpressionModifiers +{ +public: + using Rational = ASTSampleRatio::Rational; + + TableExpressionModifiers(bool has_final_, + std::optional sample_size_ratio_, + std::optional sample_offset_ratio_) + : has_final(has_final_) + , sample_size_ratio(sample_size_ratio_) + , sample_offset_ratio(sample_offset_ratio_) + {} + + /// Returns true if final is specified, false otherwise + bool hasFinal() const + { + return has_final; + } + + /// Returns true if sample size ratio is specified, false otherwise + bool hasSampleSizeRatio() const + { + return sample_size_ratio.has_value(); + } + + /// Get sample size ratio + std::optional getSampleSizeRatio() const + { + return sample_size_ratio; + } + + /// Returns true if sample offset ratio is specified, false otherwise + bool hasSampleOffsetRatio() const + { + return sample_offset_ratio.has_value(); + } + + /// Get sample offset ratio + std::optional getSampleOffsetRatio() const + { + return sample_offset_ratio; + } + + /// Dump into buffer + void dump(WriteBuffer & buffer) const; + + /// Update tree hash + void updateTreeHash(SipHash & hash_state) const; + +private: + bool has_final = false; + std::optional sample_size_ratio; + std::optional sample_offset_ratio; +}; + +inline bool operator==(const TableExpressionModifiers & lhs, const TableExpressionModifiers & rhs) +{ + return lhs.hasFinal() == rhs.hasFinal() && lhs.getSampleSizeRatio() == rhs.getSampleSizeRatio() && lhs.getSampleOffsetRatio() == rhs.getSampleOffsetRatio(); +} + +inline bool operator!=(const TableExpressionModifiers & lhs, const TableExpressionModifiers & rhs) +{ + return !(lhs == rhs); +} + +} diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp new file mode 100644 index 00000000000..2056e10ff6f --- /dev/null +++ b/src/Analyzer/TableFunctionNode.cpp @@ -0,0 +1,148 @@ +#include + +#include +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +TableFunctionNode::TableFunctionNode(String table_function_name_) + : IQueryTreeNode(children_size) + , table_function_name(table_function_name_) + , storage_id("system", "one") +{ + children[arguments_child_index] = std::make_shared(); +} + +void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context) +{ + table_function = std::move(table_function_value); + storage = std::move(storage_value); + storage_id = storage->getStorageID(); + storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); +} + +const StorageID & TableFunctionNode::getStorageID() const +{ + if (!storage) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function node {} is not resolved", table_function_name); + + return storage_id; +} + +const StorageSnapshotPtr & TableFunctionNode::getStorageSnapshot() const +{ + if (!storage) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function node {} is not resolved", table_function_name); + + return storage_snapshot; +} + +String TableFunctionNode::getName() const +{ + String name = table_function_name; + + const auto & arguments = getArguments(); + name += '('; + name += arguments.getName(); + name += ')'; + + return name; +} + +void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "TABLE_FUNCTION id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", table_function_name: " << table_function_name; + + if (table_expression_modifiers) + { + buffer << ", "; + table_expression_modifiers->dump(buffer); + } + + const auto & arguments = getArguments(); + if (!arguments.getNodes().empty()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n"; + arguments.dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (table_function_name != rhs_typed.table_function_name) + return false; + + if (storage && rhs_typed.storage) + return storage_id == rhs_typed.storage_id; + + if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers) + return false; + else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers) + return false; + else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers) + return false; + + return true; +} + +void TableFunctionNode::updateTreeHashImpl(HashState & state) const +{ + state.update(table_function_name.size()); + state.update(table_function_name); + + if (storage) + { + auto full_name = storage_id.getFullNameNotQuoted(); + state.update(full_name.size()); + state.update(full_name); + } + + if (table_expression_modifiers) + table_expression_modifiers->updateTreeHash(state); +} + +QueryTreeNodePtr TableFunctionNode::cloneImpl() const +{ + auto result = std::make_shared(table_function_name); + + result->storage = storage; + result->storage_id = storage_id; + result->storage_snapshot = storage_snapshot; + result->table_expression_modifiers = table_expression_modifiers; + + return result; +} + +ASTPtr TableFunctionNode::toASTImpl() const +{ + auto table_function_ast = std::make_shared(); + + table_function_ast->name = table_function_name; + + const auto & arguments = getArguments(); + table_function_ast->children.push_back(arguments.toAST()); + table_function_ast->arguments = table_function_ast->children.back(); + + return table_function_ast; +} + +} diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h new file mode 100644 index 00000000000..a9f08ed4d1a --- /dev/null +++ b/src/Analyzer/TableFunctionNode.h @@ -0,0 +1,156 @@ +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Table function node represents table function in query tree. + * Example: SELECT a FROM table_function(arguments...). + * + * In query tree table function arguments are represented by ListNode. + * + * Table function resolution must be done during query analysis pass. + */ +class ITableFunction; +using TableFunctionPtr = std::shared_ptr; + +class TableFunctionNode; +using TableFunctionNodePtr = std::shared_ptr; + +class TableFunctionNode : public IQueryTreeNode +{ +public: + /// Construct table function node with table function name + explicit TableFunctionNode(String table_function_name); + + /// Get table function name + const String & getTableFunctionName() const + { + return table_function_name; + } + + /// Get arguments + const ListNode & getArguments() const + { + return children[arguments_child_index]->as(); + } + + /// Get arguments + ListNode & getArguments() + { + return children[arguments_child_index]->as(); + } + + /// Get arguments node + const QueryTreeNodePtr & getArgumentsNode() const + { + return children[arguments_child_index]; + } + + /// Get arguments node + QueryTreeNodePtr & getArgumentsNode() + { + return children[arguments_child_index]; + } + + /// Returns true, if table function is resolved, false otherwise + bool isResolved() const + { + return storage != nullptr && table_function != nullptr; + } + + /// Get table function, returns nullptr if table function node is not resolved + const TableFunctionPtr & getTableFunction() const + { + return table_function; + } + + /// Get storage, returns nullptr if table function node is not resolved + const StoragePtr & getStorage() const + { + return storage; + } + + /// Get storage, throws exception if table function node is not resolved + const StoragePtr & getStorageOrThrow() const + { + if (!storage) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function node is not resolved"); + + return storage; + } + + /// Resolve table function with table function, storage and context + void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context); + + /// Get storage id, throws exception if function node is not resolved + const StorageID & getStorageID() const; + + /// Get storage snapshot, throws exception if function node is not resolved + const StorageSnapshotPtr & getStorageSnapshot() const; + + /// Return true if table function node has table expression modifiers, false otherwise + bool hasTableExpressionModifiers() const + { + return table_expression_modifiers.has_value(); + } + + /// Get table expression modifiers + const std::optional & getTableExpressionModifiers() const + { + return table_expression_modifiers; + } + + /// Set table expression modifiers + void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) + { + table_expression_modifiers = std::move(table_expression_modifiers_value); + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::TABLE_FUNCTION; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + String table_function_name; + TableFunctionPtr table_function; + StoragePtr storage; + StorageID storage_id; + StorageSnapshotPtr storage_snapshot; + std::optional table_expression_modifiers; + + static constexpr size_t arguments_child_index = 0; + static constexpr size_t children_size = arguments_child_index + 1; +}; + +} + diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp new file mode 100644 index 00000000000..3075bce238a --- /dev/null +++ b/src/Analyzer/TableNode.cpp @@ -0,0 +1,87 @@ +#include + +#include +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +TableNode::TableNode(StoragePtr storage_, StorageID storage_id_, TableLockHolder storage_lock_, StorageSnapshotPtr storage_snapshot_) + : IQueryTreeNode(children_size) + , storage(std::move(storage_)) + , storage_id(std::move(storage_id_)) + , storage_lock(std::move(storage_lock_)) + , storage_snapshot(std::move(storage_snapshot_)) +{} + +TableNode::TableNode(StoragePtr storage_, TableLockHolder storage_lock_, StorageSnapshotPtr storage_snapshot_) + : TableNode(storage_, storage_->getStorageID(), std::move(storage_lock_), std::move(storage_snapshot_)) +{ +} + +void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "TABLE id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", table_name: " << storage_id.getFullNameNotQuoted(); + + if (table_expression_modifiers) + { + buffer << ", "; + table_expression_modifiers->dump(buffer); + } +} + +bool TableNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + + if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers) + return false; + else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers) + return false; + else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers) + return false; + + return storage_id == rhs_typed.storage_id; +} + +void TableNode::updateTreeHashImpl(HashState & state) const +{ + auto full_name = storage_id.getFullNameNotQuoted(); + state.update(full_name.size()); + state.update(full_name); + + if (table_expression_modifiers) + table_expression_modifiers->updateTreeHash(state); +} + +String TableNode::getName() const +{ + return storage->getStorageID().getFullNameNotQuoted(); +} + +QueryTreeNodePtr TableNode::cloneImpl() const +{ + auto result_table_node = std::make_shared(storage, storage_id, storage_lock, storage_snapshot); + result_table_node->table_expression_modifiers = table_expression_modifiers; + + return result_table_node; +} + +ASTPtr TableNode::toASTImpl() const +{ + return std::make_shared(storage_id.getDatabaseName(), storage_id.getTableName()); +} + +} diff --git a/src/Analyzer/TableNode.h b/src/Analyzer/TableNode.h new file mode 100644 index 00000000000..f8e897378d6 --- /dev/null +++ b/src/Analyzer/TableNode.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace DB +{ + +/** Table node represents table in query tree. + * Example: SELECT a FROM test_table. + * test_table - is identifier, that during query analysis pass must be resolved into table node. + */ +class TableNode; +using TableNodePtr = std::shared_ptr; + +class TableNode : public IQueryTreeNode +{ +public: + /// Construct table node with storage, storage id, storage lock, storage snapshot + explicit TableNode(StoragePtr storage_, StorageID storage_id_, TableLockHolder storage_lock_, StorageSnapshotPtr storage_snapshot_); + + /// Construct table node with storage, storage lock, storage snapshot + explicit TableNode(StoragePtr storage_, TableLockHolder storage_lock_, StorageSnapshotPtr storage_snapshot_); + + /// Get storage + const StoragePtr & getStorage() const + { + return storage; + } + + /// Get storage id + const StorageID & getStorageID() const + { + return storage_id; + } + + /// Get storage snapshot + const StorageSnapshotPtr & getStorageSnapshot() const + { + return storage_snapshot; + } + + /// Get storage lock + const TableLockHolder & getStorageLock() const + { + return storage_lock; + } + + /// Return true if table node has table expression modifiers, false otherwise + bool hasTableExpressionModifiers() const + { + return table_expression_modifiers.has_value(); + } + + /// Get table expression modifiers + const std::optional & getTableExpressionModifiers() const + { + return table_expression_modifiers; + } + + /// Set table expression modifiers + void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) + { + table_expression_modifiers = std::move(table_expression_modifiers_value); + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::TABLE; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + StoragePtr storage; + StorageID storage_id; + TableLockHolder storage_lock; + StorageSnapshotPtr storage_snapshot; + std::optional table_expression_modifiers; + + static constexpr size_t children_size = 0; +}; + +} + diff --git a/src/Analyzer/UnionNode.cpp b/src/Analyzer/UnionNode.cpp new file mode 100644 index 00000000000..fe913373981 --- /dev/null +++ b/src/Analyzer/UnionNode.cpp @@ -0,0 +1,254 @@ +#include + +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +UnionNode::UnionNode() + : IQueryTreeNode(children_size) +{ + children[queries_child_index] = std::make_shared(); +} + +NamesAndTypes UnionNode::computeProjectionColumns() const +{ + std::vector projections; + + NamesAndTypes query_node_projection; + + const auto & query_nodes = getQueries().getNodes(); + projections.reserve(query_nodes.size()); + + for (const auto & query_node : query_nodes) + { + if (auto * query_node_typed = query_node->as()) + query_node_projection = query_node_typed->getProjectionColumns(); + else if (auto * union_node_typed = query_node->as()) + query_node_projection = union_node_typed->computeProjectionColumns(); + + projections.push_back(query_node_projection); + + if (query_node_projection.size() != projections.front().size()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "UNION different number of columns in queries"); + } + + NamesAndTypes result_columns; + + size_t projections_size = projections.size(); + DataTypes projection_column_types; + projection_column_types.resize(projections_size); + + size_t columns_size = query_node_projection.size(); + for (size_t column_index = 0; column_index < columns_size; ++column_index) + { + for (size_t projection_index = 0; projection_index < projections_size; ++projection_index) + projection_column_types[projection_index] = projections[projection_index][column_index].type; + + auto result_type = getLeastSupertype(projection_column_types); + result_columns.emplace_back(projections.front()[column_index].name, std::move(result_type)); + } + + return result_columns; +} + +String UnionNode::getName() const +{ + WriteBufferFromOwnString buffer; + + auto query_nodes = getQueries().getNodes(); + size_t query_nodes_size = query_nodes.size(); + + for (size_t i = 0; i < query_nodes_size; ++i) + { + const auto & query_node = query_nodes[i]; + buffer << query_node->getName(); + + if (i == 0) + continue; + + auto query_union_mode = union_modes.at(i - 1); + + if (query_union_mode == SelectUnionMode::UNION_DEFAULT) + buffer << "UNION"; + else if (query_union_mode == SelectUnionMode::UNION_ALL) + buffer << "UNION ALL"; + else if (query_union_mode == SelectUnionMode::UNION_DISTINCT) + buffer << "UNION DISTINCT"; + else if (query_union_mode == SelectUnionMode::EXCEPT_DEFAULT) + buffer << "EXCEPT"; + else if (query_union_mode == SelectUnionMode::EXCEPT_ALL) + buffer << "EXCEPT ALL"; + else if (query_union_mode == SelectUnionMode::EXCEPT_DISTINCT) + buffer << "EXCEPT DISTINCT"; + else if (query_union_mode == SelectUnionMode::INTERSECT_DEFAULT) + buffer << "INTERSECT"; + else if (query_union_mode == SelectUnionMode::INTERSECT_ALL) + buffer << "INTERSECT ALL"; + else if (query_union_mode == SelectUnionMode::INTERSECT_DISTINCT) + buffer << "INTERSECT DISTINCT"; + } + + return buffer.str(); +} + +void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "UNION id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + if (is_subquery) + buffer << ", is_subquery: " << is_subquery; + + if (is_cte) + buffer << ", is_cte: " << is_cte; + + if (!cte_name.empty()) + buffer << ", cte_name: " << cte_name; + + if (constant_value) + { + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + } + + if (table_expression_modifiers) + { + buffer << ", "; + table_expression_modifiers->dump(buffer); + } + + buffer << ", union_mode: " << toString(union_mode); + + size_t union_modes_size = union_modes.size(); + buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n'; + + for (size_t i = 0; i < union_modes_size; ++i) + { + buffer << std::string(indent + 4, ' '); + + auto query_union_mode = union_modes[i]; + buffer << toString(query_union_mode); + + if (i + 1 != union_modes_size) + buffer << '\n'; + } + + buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n"; + getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4); +} + +bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + if (constant_value && rhs_typed.constant_value && *constant_value != *rhs_typed.constant_value) + return false; + else if (constant_value && !rhs_typed.constant_value) + return false; + else if (!constant_value && rhs_typed.constant_value) + return false; + + if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers) + return false; + else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers) + return false; + else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers) + return false; + + return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name && + union_mode == rhs_typed.union_mode && union_modes == rhs_typed.union_modes; +} + +void UnionNode::updateTreeHashImpl(HashState & state) const +{ + state.update(is_subquery); + state.update(is_cte); + + state.update(cte_name.size()); + state.update(cte_name); + + state.update(static_cast(union_mode)); + + state.update(union_modes.size()); + for (const auto & query_union_mode : union_modes) + state.update(static_cast(query_union_mode)); + + if (constant_value) + { + auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue()); + state.update(constant_dump.size()); + state.update(constant_dump); + + auto constant_value_type_name = constant_value->getType()->getName(); + state.update(constant_value_type_name.size()); + state.update(constant_value_type_name); + } + + if (table_expression_modifiers) + table_expression_modifiers->updateTreeHash(state); +} + +QueryTreeNodePtr UnionNode::cloneImpl() const +{ + auto result_union_node = std::make_shared(); + + result_union_node->is_subquery = is_subquery; + result_union_node->is_cte = is_cte; + result_union_node->cte_name = cte_name; + result_union_node->union_mode = union_mode; + result_union_node->union_modes = union_modes; + result_union_node->union_modes_set = union_modes_set; + result_union_node->constant_value = constant_value; + result_union_node->table_expression_modifiers = table_expression_modifiers; + + return result_union_node; +} + +ASTPtr UnionNode::toASTImpl() const +{ + auto select_with_union_query = std::make_shared(); + select_with_union_query->union_mode = union_mode; + + if (union_mode != SelectUnionMode::UNION_DEFAULT && + union_mode != SelectUnionMode::EXCEPT_DEFAULT && + union_mode != SelectUnionMode::INTERSECT_DEFAULT) + select_with_union_query->is_normalized = true; + + select_with_union_query->list_of_modes = union_modes; + select_with_union_query->set_of_modes = union_modes_set; + select_with_union_query->children.push_back(getQueriesNode()->toAST()); + select_with_union_query->list_of_selects = select_with_union_query->children.back(); + + return select_with_union_query; +} + +} diff --git a/src/Analyzer/UnionNode.h b/src/Analyzer/UnionNode.h new file mode 100644 index 00000000000..9c502c8fe5b --- /dev/null +++ b/src/Analyzer/UnionNode.h @@ -0,0 +1,203 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +/** Union node represents union of queries in query tree. + * + * Example: (SELECT id FROM test_table) UNION ALL (SELECT id FROM test_table_2); + * Example: (SELECT id FROM test_table) UNION DISTINCT (SELECT id FROM test_table_2); + * Example: (SELECT id FROM test_table) EXCEPT ALL (SELECT id FROM test_table_2); + * Example: (SELECT id FROM test_table) EXCEPT DISTINCT (SELECT id FROM test_table_2); + * Example: (SELECT id FROM test_table) INTERSECT ALL (SELECT id FROM test_table_2); + * Example: (SELECT id FROM test_table) INTERSECT DISTINCT (SELECT id FROM test_table_2); + * + * Union node can be used as CTE. + * Example: WITH cte_subquery AS ((SELECT id FROM test_table) UNION ALL (SELECT id FROM test_table_2)) SELECT * FROM cte_subquery; + * + * Union node can be used as scalar subquery. + * Example: SELECT (SELECT 1 UNION DISTINCT SELECT 1); + * + * During query analysis pass union node queries must be resolved. + */ +class UnionNode; +using UnionNodePtr = std::shared_ptr; + +class UnionNode final : public IQueryTreeNode +{ +public: + explicit UnionNode(); + + /// Returns true if union node is subquery, false otherwise + bool isSubquery() const + { + return is_subquery; + } + + /// Set union node is subquery value + void setIsSubquery(bool is_subquery_value) + { + is_subquery = is_subquery_value; + } + + /// Returns true if union node is CTE, false otherwise + bool isCTE() const + { + return is_cte; + } + + /// Set union node is CTE + void setIsCTE(bool is_cte_value) + { + is_cte = is_cte_value; + } + + /// Get union node CTE name + const std::string & getCTEName() const + { + return cte_name; + } + + /// Set union node CTE name + void setCTEName(std::string cte_name_value) + { + cte_name = std::move(cte_name_value); + } + + /// Get union mode + SelectUnionMode getUnionMode() const + { + return union_mode; + } + + /// Set union mode value + void setUnionMode(SelectUnionMode union_mode_value) + { + union_mode = union_mode_value; + } + + /// Get union modes + const SelectUnionModes & getUnionModes() const + { + return union_modes; + } + + /// Set union modes value + void setUnionModes(const SelectUnionModes & union_modes_value) + { + union_modes = union_modes_value; + union_modes_set = SelectUnionModesSet(union_modes.begin(), union_modes.end()); + } + + /// Get union node queries + const ListNode & getQueries() const + { + return children[queries_child_index]->as(); + } + + /// Get union node queries + ListNode & getQueries() + { + return children[queries_child_index]->as(); + } + + /// Get union node queries node + const QueryTreeNodePtr & getQueriesNode() const + { + return children[queries_child_index]; + } + + /// Get union node queries node + QueryTreeNodePtr & getQueriesNode() + { + return children[queries_child_index]; + } + + /// Return true if union node has table expression modifiers, false otherwise + bool hasTableExpressionModifiers() const + { + return table_expression_modifiers.has_value(); + } + + /// Get table expression modifiers + const std::optional & getTableExpressionModifiers() const + { + return table_expression_modifiers; + } + + /// Set table expression modifiers + void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value) + { + table_expression_modifiers = std::move(table_expression_modifiers_value); + } + + /// Compute union node projection columns + NamesAndTypes computeProjectionColumns() const; + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::UNION; + } + + String getName() const override; + + DataTypePtr getResultType() const override + { + if (constant_value) + return constant_value->getType(); + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for non scalar union node"); + } + + /// Perform constant folding for scalar union node + void performConstantFolding(ConstantValuePtr constant_folded_value) + { + constant_value = std::move(constant_folded_value); + } + + ConstantValuePtr getConstantValueOrNull() const override + { + return constant_value; + } + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState &) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + bool is_subquery = false; + bool is_cte = false; + std::string cte_name; + SelectUnionMode union_mode; + SelectUnionModes union_modes; + SelectUnionModesSet union_modes_set; + ConstantValuePtr constant_value; + std::optional table_expression_modifiers; + + static constexpr size_t queries_child_index = 0; + static constexpr size_t children_size = queries_child_index + 1; +}; + +} diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp new file mode 100644 index 00000000000..5f0d682865f --- /dev/null +++ b/src/Analyzer/Utils.cpp @@ -0,0 +1,334 @@ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root) +{ + std::vector nodes_to_process; + nodes_to_process.push_back(root); + + while (!nodes_to_process.empty()) + { + const auto * subtree_node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + if (subtree_node == node) + return true; + + for (const auto & child : subtree_node->getChildren()) + { + if (child) + nodes_to_process.push_back(child.get()); + } + } + + return false; +} + +bool isNameOfInFunction(const std::string & function_name) +{ + bool is_special_function_in = function_name == "in" || + function_name == "globalIn" || + function_name == "notIn" || + function_name == "globalNotIn" || + function_name == "nullIn" || + function_name == "globalNullIn" || + function_name == "notNullIn" || + function_name == "globalNotNullIn" || + function_name == "inIgnoreSet" || + function_name == "globalInIgnoreSet" || + function_name == "notInIgnoreSet" || + function_name == "globalNotInIgnoreSet" || + function_name == "nullInIgnoreSet" || + function_name == "globalNullInIgnoreSet" || + function_name == "notNullInIgnoreSet" || + function_name == "globalNotNullInIgnoreSet"; + + return is_special_function_in; +} + +static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expression_node) +{ + ASTPtr table_expression_node_ast; + auto node_type = table_expression_node->getNodeType(); + + if (node_type == QueryTreeNodeType::IDENTIFIER) + { + const auto & identifier_node = table_expression_node->as(); + const auto & identifier = identifier_node.getIdentifier(); + + if (identifier.getPartsSize() == 1) + table_expression_node_ast = std::make_shared(identifier[0]); + else if (identifier.getPartsSize() == 2) + table_expression_node_ast = std::make_shared(identifier[0], identifier[1]); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Identifier for table expression must contain 1 or 2 parts. Actual '{}'", + identifier.getFullName()); + } + else + { + table_expression_node_ast = table_expression_node->toAST(); + } + + auto result_table_expression = std::make_shared(); + result_table_expression->children.push_back(table_expression_node_ast); + + std::optional table_expression_modifiers; + + if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) + { + if (auto * query_node = table_expression_node->as()) + table_expression_modifiers = query_node->getTableExpressionModifiers(); + else if (auto * union_node = table_expression_node->as()) + table_expression_modifiers = union_node->getTableExpressionModifiers(); + + result_table_expression->subquery = result_table_expression->children.back(); + } + else if (node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::IDENTIFIER) + { + if (auto * table_node = table_expression_node->as()) + table_expression_modifiers = table_node->getTableExpressionModifiers(); + else if (auto * identifier_node = table_expression_node->as()) + table_expression_modifiers = identifier_node->getTableExpressionModifiers(); + + result_table_expression->database_and_table_name = result_table_expression->children.back(); + } + else if (node_type == QueryTreeNodeType::TABLE_FUNCTION) + { + if (auto * table_function_node = table_expression_node->as()) + table_expression_modifiers = table_function_node->getTableExpressionModifiers(); + + result_table_expression->table_function = result_table_expression->children.back(); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected identifier, table, query, union or table function. Actual {}", + table_expression_node->formatASTForErrorMessage()); + } + + if (table_expression_modifiers) + { + result_table_expression->final = table_expression_modifiers->hasFinal(); + + const auto & sample_size_ratio = table_expression_modifiers->getSampleSizeRatio(); + if (sample_size_ratio.has_value()) + result_table_expression->sample_size = std::make_shared(*sample_size_ratio); + + const auto & sample_offset_ratio = table_expression_modifiers->getSampleOffsetRatio(); + if (sample_offset_ratio.has_value()) + result_table_expression->sample_offset = std::make_shared(*sample_offset_ratio); + } + + return result_table_expression; +} + +void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression) +{ + auto table_expression_node_type = table_expression->getNodeType(); + + switch (table_expression_node_type) + { + case QueryTreeNodeType::IDENTIFIER: + [[fallthrough]]; + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + auto table_expression_ast = convertIntoTableExpressionAST(table_expression); + + auto tables_in_select_query_element_ast = std::make_shared(); + tables_in_select_query_element_ast->children.push_back(std::move(table_expression_ast)); + tables_in_select_query_element_ast->table_expression = tables_in_select_query_element_ast->children.back(); + + tables_in_select_query_ast->children.push_back(std::move(tables_in_select_query_element_ast)); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + [[fallthrough]]; + case QueryTreeNodeType::JOIN: + { + auto table_expression_tables_in_select_query_ast = table_expression->toAST(); + tables_in_select_query_ast->children.reserve(table_expression_tables_in_select_query_ast->children.size()); + for (auto && table_element_ast : table_expression_tables_in_select_query_ast->children) + tables_in_select_query_ast->children.push_back(std::move(table_element_ast)); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected identifier, table, table function, query, union, join or array join. Actual {}", + table_expression->getNodeTypeName()); + } + } +} + +QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node) +{ + QueryTreeNodes result; + + std::deque nodes_to_process; + nodes_to_process.push_back(join_tree_node); + + while (!nodes_to_process.empty()) + { + auto node_to_process = std::move(nodes_to_process.front()); + nodes_to_process.pop_front(); + + auto node_type = node_to_process->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + result.push_back(std::move(node_to_process)); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join_node = node_to_process->as(); + nodes_to_process.push_front(array_join_node.getTableExpression()); + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = node_to_process->as(); + nodes_to_process.push_front(join_node.getRightTableExpression()); + nodes_to_process.push_front(join_node.getLeftTableExpression()); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + node_to_process->getNodeTypeName()); + } + } + } + + return result; +} + +namespace +{ + +void buildTableExpressionsStackImpl(const QueryTreeNodePtr & join_tree_node, QueryTreeNodes & result) +{ + auto node_type = join_tree_node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + result.push_back(join_tree_node); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join_node = join_tree_node->as(); + buildTableExpressionsStackImpl(array_join_node.getTableExpression(), result); + result.push_back(join_tree_node); + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node->as(); + buildTableExpressionsStackImpl(join_node.getLeftTableExpression(), result); + buildTableExpressionsStackImpl(join_node.getRightTableExpression(), result); + result.push_back(join_tree_node); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node->getNodeTypeName()); + } + } +} + +} + +QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_node) +{ + QueryTreeNodes result; + buildTableExpressionsStackImpl(join_tree_node, result); + + return result; +} + +QueryTreeNodePtr getColumnSourceForJoinNodeWithUsing(const QueryTreeNodePtr & join_node) +{ + QueryTreeNodePtr column_source_node = join_node; + + while (true) + { + auto column_source_node_type = column_source_node->getNodeType(); + if (column_source_node_type == QueryTreeNodeType::TABLE || + column_source_node_type == QueryTreeNodeType::TABLE_FUNCTION || + column_source_node_type == QueryTreeNodeType::QUERY || + column_source_node_type == QueryTreeNodeType::UNION) + { + break; + } + else if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN) + { + auto & array_join_node = column_source_node->as(); + column_source_node = array_join_node.getTableExpression(); + continue; + } + else if (column_source_node_type == QueryTreeNodeType::JOIN) + { + auto & join_node_typed = column_source_node->as(); + column_source_node = isRight(join_node_typed.getKind()) ? join_node_typed.getRightTableExpression() : join_node_typed.getLeftTableExpression(); + continue; + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + column_source_node->getNodeTypeName()); + } + } + + return column_source_node; +} + +} diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h new file mode 100644 index 00000000000..325a7d2fcc8 --- /dev/null +++ b/src/Analyzer/Utils.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +namespace DB +{ + +/// Returns true if node part of root tree, false otherwise +bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root); + +/// Returns true if function name is name of IN function or its variations, false otherwise +bool isNameOfInFunction(const std::string & function_name); + +/** Add table expression in tables in select query children. + * If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception. + */ +void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression); + +/// Extract table, table function, query, union from join tree +QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node); + +/** Build table expressions stack that consists from table, table function, query, union, join, array join from join tree. + * + * Example: SELECT * FROM t1 INNER JOIN t2 INNER JOIN t3. + * Result table expressions stack: + * 1. t1 INNER JOIN t2 INNER JOIN t3 + * 2. t3 + * 3. t1 INNER JOIN t2 + * 4. t2 + * 5. t1 + */ +QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_node); + +/** Get column source for JOIN node with USING. + * Example: SELECT id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 USING (id); + */ +QueryTreeNodePtr getColumnSourceForJoinNodeWithUsing(const QueryTreeNodePtr & join_node); + +} diff --git a/src/Analyzer/WindowFunctionsUtils.cpp b/src/Analyzer/WindowFunctionsUtils.cpp new file mode 100644 index 00000000000..fb411f2418c --- /dev/null +++ b/src/Analyzer/WindowFunctionsUtils.cpp @@ -0,0 +1,78 @@ +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_AGGREGATION; +} + +namespace +{ + +class CollectWindowFunctionNodeVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit CollectWindowFunctionNodeVisitor(QueryTreeNodes * window_function_nodes_) + : window_function_nodes(window_function_nodes_) + {} + + explicit CollectWindowFunctionNodeVisitor(String assert_no_window_functions_place_message_) + : assert_no_window_functions_place_message(std::move(assert_no_window_functions_place_message_)) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !function_node->isWindowFunction()) + return; + + if (!assert_no_window_functions_place_message.empty()) + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, + "Window function {} is found {} in query", + function_node->formatASTForErrorMessage(), + assert_no_window_functions_place_message); + + if (window_function_nodes) + window_function_nodes->push_back(node); + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + QueryTreeNodes * window_function_nodes = nullptr; + String assert_no_window_functions_place_message; +}; + +} + +QueryTreeNodes collectWindowFunctionNodes(const QueryTreeNodePtr & node) +{ + QueryTreeNodes window_function_nodes; + CollectWindowFunctionNodeVisitor visitor(&window_function_nodes); + visitor.visit(node); + + return window_function_nodes; +} + +void collectWindowFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result) +{ + CollectWindowFunctionNodeVisitor visitor(&result); + visitor.visit(node); +} + +void assertNoWindowFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_window_functions_place_message) +{ + CollectWindowFunctionNodeVisitor visitor(assert_no_window_functions_place_message); + visitor.visit(node); +} + +} diff --git a/src/Analyzer/WindowFunctionsUtils.h b/src/Analyzer/WindowFunctionsUtils.h new file mode 100644 index 00000000000..b6ff5f22f93 --- /dev/null +++ b/src/Analyzer/WindowFunctionsUtils.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +/** Collect window function nodes in node children. + * Do not visit subqueries. + */ +QueryTreeNodes collectWindowFunctionNodes(const QueryTreeNodePtr & node); + +/** Collect window function nodes in node children and add them into result. + * Do not visit subqueries. + */ +void collectWindowFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result); + +/** Assert that there are no window function nodes in node children. + * Do not visit subqueries. + */ +void assertNoWindowFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_window_functions_place_message); + +} diff --git a/src/Analyzer/WindowNode.cpp b/src/Analyzer/WindowNode.cpp new file mode 100644 index 00000000000..ccf49bc37d8 --- /dev/null +++ b/src/Analyzer/WindowNode.cpp @@ -0,0 +1,213 @@ +#include + +#include + +#include +#include + +#include + +namespace DB +{ + +WindowNode::WindowNode(WindowFrame window_frame_) + : IQueryTreeNode(children_size) + , window_frame(std::move(window_frame_)) +{ + children[partition_by_child_index] = std::make_shared(); + children[order_by_child_index] = std::make_shared(); +} + +String WindowNode::getName() const +{ + String result; + + if (hasPartitionBy()) + { + result += "PARTITION BY"; + result += getPartitionBy().getName(); + } + + if (hasOrderBy()) + { + result += "ORDER BY"; + result += getOrderBy().getName(); + } + + if (!window_frame.is_default) + { + if (hasPartitionBy() || hasOrderBy()) + result += ' '; + + if (window_frame.type == WindowFrame::FrameType::ROWS) + result += "ROWS"; + else if (window_frame.type == WindowFrame::FrameType::GROUPS) + result += "GROUPS"; + else if (window_frame.type == WindowFrame::FrameType::RANGE) + result += "RANGE"; + + result += " BETWEEN "; + if (window_frame.begin_type == WindowFrame::BoundaryType::Current) + { + result += "CURRENT ROW"; + } + else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded) + { + result += "UNBOUNDED"; + result += " "; + result += (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + result += getFrameBeginOffsetNode()->getName(); + result += " "; + result += (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + + result += " AND "; + + if (window_frame.end_type == WindowFrame::BoundaryType::Current) + { + result += "CURRENT ROW"; + } + else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded) + { + result += "UNBOUNDED"; + result += " "; + result += (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + result += getFrameEndOffsetNode()->getName(); + result += " "; + result += (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + } + + return result; +} + +void WindowNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "WINDOW id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + if (!parent_window_name.empty()) + buffer << ", parent_window_name: " << parent_window_name; + + buffer << ", frame_type: " << window_frame.type; + + auto window_frame_bound_type_to_string = [](WindowFrame::BoundaryType boundary_type, bool boundary_preceding) + { + std::string value; + + if (boundary_type == WindowFrame::BoundaryType::Unbounded) + value = "unbounded"; + else if (boundary_type == WindowFrame::BoundaryType::Current) + value = "current"; + else if (boundary_type == WindowFrame::BoundaryType::Offset) + value = "offset"; + + if (boundary_type != WindowFrame::BoundaryType::Current) + { + if (boundary_preceding) + value += " preceding"; + else + value += " following"; + } + + return value; + }; + + buffer << ", frame_begin_type: " << window_frame_bound_type_to_string(window_frame.begin_type, window_frame.begin_preceding); + buffer << ", frame_end_type: " << window_frame_bound_type_to_string(window_frame.end_type, window_frame.end_preceding); + + if (hasPartitionBy()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "PARTITION BY\n"; + getPartitionBy().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasOrderBy()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "ORDER BY\n"; + getOrderBy().dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasFrameBeginOffset()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "FRAME BEGIN OFFSET\n"; + getFrameBeginOffsetNode()->dumpTreeImpl(buffer, format_state, indent + 4); + } + + if (hasFrameEndOffset()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "FRAME END OFFSET\n"; + getFrameEndOffsetNode()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool WindowNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + + return window_frame == rhs_typed.window_frame && parent_window_name == rhs_typed.parent_window_name; +} + +void WindowNode::updateTreeHashImpl(HashState & hash_state) const +{ + hash_state.update(window_frame.is_default); + hash_state.update(window_frame.type); + hash_state.update(window_frame.begin_type); + hash_state.update(window_frame.begin_preceding); + hash_state.update(window_frame.end_type); + hash_state.update(window_frame.end_preceding); + + hash_state.update(parent_window_name); +} + +QueryTreeNodePtr WindowNode::cloneImpl() const +{ + auto window_node = std::make_shared(window_frame); + window_node->parent_window_name = parent_window_name; + + return window_node; +} + +ASTPtr WindowNode::toASTImpl() const +{ + auto window_definition = std::make_shared(); + + window_definition->parent_window_name = parent_window_name; + + window_definition->children.push_back(getPartitionByNode()->toAST()); + window_definition->partition_by = window_definition->children.back(); + + window_definition->children.push_back(getOrderByNode()->toAST()); + window_definition->order_by = window_definition->children.back(); + + window_definition->frame_is_default = window_frame.is_default; + window_definition->frame_type = window_frame.type; + window_definition->frame_begin_type = window_frame.begin_type; + window_definition->frame_begin_preceding = window_frame.begin_preceding; + + if (hasFrameBeginOffset()) + { + window_definition->children.push_back(getFrameBeginOffsetNode()->toAST()); + window_definition->frame_begin_offset = window_definition->children.back(); + } + + window_definition->frame_end_type = window_frame.end_type; + window_definition->frame_end_preceding = window_frame.end_preceding; + if (hasFrameEndOffset()) + { + window_definition->children.push_back(getFrameEndOffsetNode()->toAST()); + window_definition->frame_end_offset = window_definition->children.back(); + } + + return window_definition; +} + +} diff --git a/src/Analyzer/WindowNode.h b/src/Analyzer/WindowNode.h new file mode 100644 index 00000000000..2f99524eebd --- /dev/null +++ b/src/Analyzer/WindowNode.h @@ -0,0 +1,193 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +/** Window node represents window function window description. + * + * Example: SELECT * FROM test_table WINDOW window AS (PARTITION BY id); + * window AS (PARTITION BY id) - window node. + * + * Example: SELECT count() OVER (PARTITION BY id) FROM test_table; + * PARTITION BY id - window node. + * + * Window node can also refer to its parent window node. + * Example: SELECT count() OVER (parent_window ORDER BY id) FROM test_table WINDOW parent_window AS (PARTITION BY id); + * parent_window ORDER BY id - window node. + * + * Window node initially initialized with window frame. + * + * If window frame has OFFSET begin type, additionally frame begin offset node must be initialized. + * If window frame has OFFSET end type, additionally frame end offset node must be initialized. + * During query analysis pass they must be resolved, validated and window node window frame offset constants must be updated. + */ +class WindowNode; +using WindowNodePtr = std::shared_ptr; + +class WindowNode final : public IQueryTreeNode +{ +public: + /// Initialize window node with window frame + explicit WindowNode(WindowFrame window_frame_); + + /// Get window node window frame + const WindowFrame & getWindowFrame() const + { + return window_frame; + } + + /// Get window node window frame + WindowFrame & getWindowFrame() + { + return window_frame; + } + + /// Returns true if window node has parent window name, false otherwise + bool hasParentWindowName() const + { + return parent_window_name.empty(); + } + + /// Get parent window name + const String & getParentWindowName() const + { + return parent_window_name; + } + + /// Set parent window name + void setParentWindowName(String parent_window_name_value) + { + parent_window_name = std::move(parent_window_name_value); + } + + /// Returns true if window node has order by, false otherwise + bool hasOrderBy() const + { + return !getOrderBy().getNodes().empty(); + } + + /// Get order by + const ListNode & getOrderBy() const + { + return children[order_by_child_index]->as(); + } + + /// Get order by + ListNode & getOrderBy() + { + return children[order_by_child_index]->as(); + } + + /// Get order by node + const QueryTreeNodePtr & getOrderByNode() const + { + return children[order_by_child_index]; + } + + /// Get order by node + QueryTreeNodePtr & getOrderByNode() + { + return children[order_by_child_index]; + } + + /// Returns true if window node has partition by, false otherwise + bool hasPartitionBy() const + { + return !getPartitionBy().getNodes().empty(); + } + + /// Get partition by + const ListNode & getPartitionBy() const + { + return children[partition_by_child_index]->as(); + } + + /// Get partition by + ListNode & getPartitionBy() + { + return children[partition_by_child_index]->as(); + } + + /// Get partition by node + const QueryTreeNodePtr & getPartitionByNode() const + { + return children[partition_by_child_index]; + } + + /// Get partition by node + QueryTreeNodePtr & getPartitionByNode() + { + return children[partition_by_child_index]; + } + + /// Returns true if window node has FRAME begin offset, false otherwise + bool hasFrameBeginOffset() const + { + return getFrameBeginOffsetNode() != nullptr; + } + + /// Get FRAME begin offset node + const QueryTreeNodePtr & getFrameBeginOffsetNode() const + { + return children[frame_begin_offset_child_index]; + } + + /// Get FRAME begin offset node + QueryTreeNodePtr & getFrameBeginOffsetNode() + { + return children[frame_begin_offset_child_index]; + } + + /// Returns true if window node has FRAME end offset, false otherwise + bool hasFrameEndOffset() const + { + return getFrameEndOffsetNode() != nullptr; + } + + /// Get FRAME end offset node + const QueryTreeNodePtr & getFrameEndOffsetNode() const + { + return children[frame_end_offset_child_index]; + } + + /// Get FRAME end offset node + QueryTreeNodePtr & getFrameEndOffsetNode() + { + return children[frame_end_offset_child_index]; + } + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::WINDOW; + } + + String getName() const override; + + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + +protected: + bool isEqualImpl(const IQueryTreeNode & rhs) const override; + + void updateTreeHashImpl(HashState & hash_state) const override; + + QueryTreeNodePtr cloneImpl() const override; + + ASTPtr toASTImpl() const override; + +private: + static constexpr size_t order_by_child_index = 0; + static constexpr size_t partition_by_child_index = 1; + static constexpr size_t frame_begin_offset_child_index = 3; + static constexpr size_t frame_end_offset_child_index = 4; + static constexpr size_t children_size = frame_end_offset_child_index + 1; + + WindowFrame window_frame; + String parent_window_name; +}; + +} diff --git a/src/Analyzer/examples/CMakeLists.txt b/src/Analyzer/examples/CMakeLists.txt new file mode 100644 index 00000000000..c6b1b0b3c5f --- /dev/null +++ b/src/Analyzer/examples/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable (query_analyzer query_analyzer.cpp) +target_include_directories (query_analyzer SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) +target_link_libraries (query_analyzer PRIVATE dbms) diff --git a/src/Analyzer/examples/query_analyzer.cpp b/src/Analyzer/examples/query_analyzer.cpp new file mode 100644 index 00000000000..5a20b46b346 --- /dev/null +++ b/src/Analyzer/examples/query_analyzer.cpp @@ -0,0 +1,9 @@ +#include + +int main(int argc, char ** argv) +{ + (void)(argc); + (void)(argv); + + return 0; +} diff --git a/src/Analyzer/tests/CMakeLists.txt b/src/Analyzer/tests/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Analyzer/tests/gtest_identifier.cpp b/src/Analyzer/tests/gtest_identifier.cpp new file mode 100644 index 00000000000..4f224dfbb07 --- /dev/null +++ b/src/Analyzer/tests/gtest_identifier.cpp @@ -0,0 +1,227 @@ +#include + +#include + +using namespace DB; + +TEST(Identifier, IdentifierBasics) +{ + { + Identifier identifier; + + ASSERT_TRUE(identifier.empty()); + ASSERT_TRUE(identifier.isEmpty()); + ASSERT_EQ(identifier.getPartsSize(), 0); + ASSERT_FALSE(identifier.isShort()); + ASSERT_FALSE(identifier.isCompound()); + ASSERT_FALSE(identifier.startsWith("test")); + ASSERT_FALSE(identifier.endsWith("test")); + ASSERT_EQ(identifier.begin(), identifier.end()); + ASSERT_EQ(identifier.getFullName(), ""); + } + { + Identifier identifier("value"); + + ASSERT_FALSE(identifier.empty()); + ASSERT_FALSE(identifier.isEmpty()); + ASSERT_EQ(identifier.getPartsSize(), 1); + ASSERT_TRUE(identifier.isShort()); + ASSERT_FALSE(identifier.isCompound()); + ASSERT_EQ(identifier.front(), "value"); + ASSERT_EQ(identifier.back(), "value"); + ASSERT_FALSE(identifier.startsWith("test")); + ASSERT_FALSE(identifier.endsWith("test")); + ASSERT_TRUE(identifier.startsWith("value")); + ASSERT_TRUE(identifier.endsWith("value")); + ASSERT_EQ(identifier[0], "value"); + ASSERT_NE(identifier.begin(), identifier.end()); + ASSERT_EQ(identifier.getFullName(), "value"); + } + { + Identifier identifier("value1.value2"); + + ASSERT_FALSE(identifier.empty()); + ASSERT_FALSE(identifier.isEmpty()); + ASSERT_EQ(identifier.getPartsSize(), 2); + ASSERT_FALSE(identifier.isShort()); + ASSERT_TRUE(identifier.isCompound()); + ASSERT_EQ(identifier.front(), "value1"); + ASSERT_EQ(identifier.back(), "value2"); + ASSERT_FALSE(identifier.startsWith("test")); + ASSERT_FALSE(identifier.endsWith("test")); + ASSERT_TRUE(identifier.startsWith("value1")); + ASSERT_TRUE(identifier.endsWith("value2")); + ASSERT_EQ(identifier[0], "value1"); + ASSERT_EQ(identifier[1], "value2"); + ASSERT_NE(identifier.begin(), identifier.end()); + ASSERT_EQ(identifier.getFullName(), "value1.value2"); + } + { + Identifier identifier1("value1.value2"); + Identifier identifier2("value1.value2"); + + ASSERT_EQ(identifier1, identifier2); + } + { + Identifier identifier1("value1.value2"); + Identifier identifier2("value1.value3"); + + ASSERT_NE(identifier1, identifier2); + } +} + +TEST(Identifier, IdentifierPopParts) +{ + { + Identifier identifier("value1.value2.value3"); + + ASSERT_EQ(identifier.getFullName(), "value1.value2.value3"); + identifier.popLast(); + ASSERT_EQ(identifier.getFullName(), "value1.value2"); + identifier.popLast(); + ASSERT_EQ(identifier.getFullName(), "value1"); + identifier.popLast(); + ASSERT_EQ(identifier.getFullName(), ""); + ASSERT_TRUE(identifier.isEmpty()); + } + { + Identifier identifier("value1.value2.value3"); + + ASSERT_EQ(identifier.getFullName(), "value1.value2.value3"); + identifier.popFirst(); + ASSERT_EQ(identifier.getFullName(), "value2.value3"); + identifier.popFirst(); + ASSERT_EQ(identifier.getFullName(), "value3"); + identifier.popFirst(); + ASSERT_EQ(identifier.getFullName(), ""); + ASSERT_TRUE(identifier.isEmpty()); + } + { + Identifier identifier("value1.value2.value3"); + + ASSERT_EQ(identifier.getFullName(), "value1.value2.value3"); + identifier.popLast(); + ASSERT_EQ(identifier.getFullName(), "value1.value2"); + identifier.popFirst(); + ASSERT_EQ(identifier.getFullName(), "value2"); + identifier.popLast(); + ASSERT_EQ(identifier.getFullName(), ""); + ASSERT_TRUE(identifier.isEmpty()); + } +} + +TEST(Identifier, IdentifierViewBasics) +{ + { + Identifier identifier; + IdentifierView identifier_view(identifier); + + ASSERT_TRUE(identifier_view.empty()); + ASSERT_TRUE(identifier_view.isEmpty()); + ASSERT_EQ(identifier_view.getPartsSize(), 0); + ASSERT_FALSE(identifier_view.isShort()); + ASSERT_FALSE(identifier_view.isCompound()); + ASSERT_FALSE(identifier_view.startsWith("test")); + ASSERT_FALSE(identifier_view.endsWith("test")); + ASSERT_EQ(identifier_view.begin(), identifier_view.end()); + ASSERT_EQ(identifier_view.getFullName(), ""); + } + { + Identifier identifier("value"); + IdentifierView identifier_view(identifier); + + ASSERT_FALSE(identifier_view.empty()); + ASSERT_FALSE(identifier_view.isEmpty()); + ASSERT_EQ(identifier_view.getPartsSize(), 1); + ASSERT_TRUE(identifier_view.isShort()); + ASSERT_FALSE(identifier_view.isCompound()); + ASSERT_EQ(identifier_view.front(), "value"); + ASSERT_EQ(identifier_view.back(), "value"); + ASSERT_FALSE(identifier_view.startsWith("test")); + ASSERT_FALSE(identifier_view.endsWith("test")); + ASSERT_TRUE(identifier_view.startsWith("value")); + ASSERT_TRUE(identifier_view.endsWith("value")); + ASSERT_EQ(identifier_view[0], "value"); + ASSERT_NE(identifier_view.begin(), identifier_view.end()); + ASSERT_EQ(identifier_view.getFullName(), "value"); + } + { + Identifier identifier("value1.value2"); + IdentifierView identifier_view(identifier); + + ASSERT_FALSE(identifier_view.empty()); + ASSERT_FALSE(identifier_view.isEmpty()); + ASSERT_EQ(identifier_view.getPartsSize(), 2); + ASSERT_FALSE(identifier_view.isShort()); + ASSERT_TRUE(identifier_view.isCompound()); + ASSERT_FALSE(identifier_view.startsWith("test")); + ASSERT_FALSE(identifier_view.endsWith("test")); + ASSERT_TRUE(identifier_view.startsWith("value1")); + ASSERT_TRUE(identifier_view.endsWith("value2")); + ASSERT_EQ(identifier_view[0], "value1"); + ASSERT_EQ(identifier_view[1], "value2"); + ASSERT_NE(identifier_view.begin(), identifier_view.end()); + ASSERT_EQ(identifier_view.getFullName(), "value1.value2"); + } + { + Identifier identifier1("value1.value2"); + IdentifierView identifier_view1(identifier1); + + Identifier identifier2("value1.value2"); + IdentifierView identifier_view2(identifier2); + + ASSERT_EQ(identifier_view1, identifier_view2); + } + { + Identifier identifier1("value1.value2"); + IdentifierView identifier_view1(identifier1); + + Identifier identifier2("value1.value3"); + IdentifierView identifier_view2(identifier2); + + ASSERT_NE(identifier_view1, identifier_view2); + } +} + +TEST(Identifier, IdentifierViewPopParts) +{ + { + Identifier identifier("value1.value2.value3"); + IdentifierView identifier_view(identifier); + + ASSERT_EQ(identifier_view.getFullName(), "value1.value2.value3"); + identifier_view.popLast(); + ASSERT_EQ(identifier_view.getFullName(), "value1.value2"); + identifier_view.popLast(); + ASSERT_EQ(identifier_view.getFullName(), "value1"); + identifier_view.popLast(); + ASSERT_EQ(identifier_view.getFullName(), ""); + ASSERT_TRUE(identifier_view.isEmpty()); + } + { + Identifier identifier("value1.value2.value3"); + IdentifierView identifier_view(identifier); + + ASSERT_EQ(identifier_view.getFullName(), "value1.value2.value3"); + identifier_view.popFirst(); + ASSERT_EQ(identifier_view.getFullName(), "value2.value3"); + identifier_view.popFirst(); + ASSERT_EQ(identifier_view.getFullName(), "value3"); + identifier_view.popFirst(); + ASSERT_EQ(identifier_view.getFullName(), ""); + ASSERT_TRUE(identifier_view.isEmpty()); + } + { + Identifier identifier("value1.value2.value3"); + IdentifierView identifier_view(identifier); + + ASSERT_EQ(identifier_view.getFullName(), "value1.value2.value3"); + identifier_view.popLast(); + ASSERT_EQ(identifier_view.getFullName(), "value1.value2"); + identifier_view.popFirst(); + ASSERT_EQ(identifier_view.getFullName(), "value2"); + identifier_view.popLast(); + ASSERT_EQ(identifier_view.getFullName(), ""); + ASSERT_TRUE(identifier_view.isEmpty()); + } +} diff --git a/src/Analyzer/tests/gtest_query_tree_node.cpp b/src/Analyzer/tests/gtest_query_tree_node.cpp new file mode 100644 index 00000000000..079869b2a53 --- /dev/null +++ b/src/Analyzer/tests/gtest_query_tree_node.cpp @@ -0,0 +1,86 @@ +#include + +#include + +#include +#include +#include + +using namespace DB; + +class SourceNode final : public IQueryTreeNode +{ +public: + SourceNode() : IQueryTreeNode(0 /*children_size*/) {} + + QueryTreeNodeType getNodeType() const override + { + return QueryTreeNodeType::TABLE; + } + + void dumpTreeImpl(WriteBuffer &, FormatState &, size_t) const override + { + } + + bool isEqualImpl(const IQueryTreeNode &) const override + { + return true; + } + + void updateTreeHashImpl(HashState &) const override + { + } + + QueryTreeNodePtr cloneImpl() const override + { + return std::make_shared(); + } + + ASTPtr toASTImpl() const override + { + return nullptr; + } +}; + +TEST(QueryTreeNode, Clone) +{ + { + auto source_node = std::make_shared(); + + NameAndTypePair column_name_and_type("value", std::make_shared()); + auto column_node = std::make_shared(column_name_and_type, source_node); + + ASSERT_EQ(column_node->getColumnSource().get(), source_node.get()); + + auto cloned_column_node = column_node->clone(); + + /// If in subtree source was not cloned, source pointer must remain same + ASSERT_NE(column_node.get(), cloned_column_node.get()); + ASSERT_EQ(cloned_column_node->as().getColumnSource().get(), source_node.get()); + } + { + auto root_node = std::make_shared(); + auto source_node = std::make_shared(); + + NameAndTypePair column_name_and_type("value", std::make_shared()); + auto column_node = std::make_shared(column_name_and_type, source_node); + + root_node->getNodes().push_back(source_node); + root_node->getNodes().push_back(column_node); + + ASSERT_EQ(column_node->getColumnSource().get(), source_node.get()); + + auto cloned_root_node = std::static_pointer_cast(root_node->clone()); + auto cloned_source_node = cloned_root_node->getNodes()[0]; + auto cloned_column_node = std::static_pointer_cast(cloned_root_node->getNodes()[1]); + + /** If in subtree source was cloned. + * Source pointer for node that was cloned must remain same. + * Source pointer for cloned node must be updated. + */ + ASSERT_NE(column_node.get(), cloned_column_node.get()); + ASSERT_NE(source_node.get(), cloned_source_node.get()); + ASSERT_EQ(column_node->getColumnSource().get(), source_node.get()); + ASSERT_EQ(cloned_column_node->getColumnSource().get(), cloned_source_node.get()); + } +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 419a34fc2ab..ce2cc862b32 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,6 +66,8 @@ add_subdirectory (Storages) add_subdirectory (Parsers) add_subdirectory (IO) add_subdirectory (Functions) +add_subdirectory (Analyzer) +add_subdirectory (Planner) add_subdirectory (Interpreters) add_subdirectory (AggregateFunctions) add_subdirectory (Client) @@ -254,6 +256,9 @@ add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations) add_object_library(clickhouse_databases Databases) add_object_library(clickhouse_databases_mysql Databases/MySQL) add_object_library(clickhouse_disks Disks) +add_object_library(clickhouse_analyzer Analyzer) +add_object_library(clickhouse_analyzer_passes Analyzer/Passes) +add_object_library(clickhouse_planner Planner) add_object_library(clickhouse_interpreters Interpreters) add_object_library(clickhouse_interpreters_cache Interpreters/Cache) add_object_library(clickhouse_interpreters_access Interpreters/Access) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 19a6e6bfa87..30e7423fde0 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -25,6 +27,10 @@ # include #endif +#if USE_MULTITARGET_CODE +# include +#endif + #if USE_EMBEDDED_COMPILER #include #include @@ -471,6 +477,128 @@ void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); } +static inline UInt64 blsr(UInt64 mask) +{ +#ifdef __BMI__ + return _blsr_u64(mask); +#else + return mask & (mask-1); +#endif +} + +DECLARE_DEFAULT_CODE( +template +inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) +{ + while (filt_pos < filt_end_aligned) + { + UInt64 mask = bytes64MaskToBits64Mask(filt_pos); + + if (0xffffffffffffffff == mask) + { + res_data.insert(data_pos, data_pos + SIMD_ELEMENTS); + } + else + { + while (mask) + { + size_t index = std::countr_zero(mask); + res_data.push_back(data_pos[index]); + mask = blsr(mask); + } + } + + filt_pos += SIMD_ELEMENTS; + data_pos += SIMD_ELEMENTS; + } +} +) + +namespace +{ +template +void resize(Container & res_data, size_t reserve_size) +{ +#if defined(MEMORY_SANITIZER) + res_data.resize_fill(reserve_size, static_cast(0)); // MSan doesn't recognize that all allocated memory is written by AVX-512 intrinsics. +#else + res_data.resize(reserve_size); +#endif +} +} + +DECLARE_AVX512VBMI2_SPECIFIC_CODE( +template +inline void compressStoreAVX512(const void *src, void *dst, const UInt64 mask) +{ + __m512i vsrc = _mm512_loadu_si512(src); + if constexpr (ELEMENT_WIDTH == 1) + _mm512_mask_compressstoreu_epi8(dst, static_cast<__mmask64>(mask), vsrc); + else if constexpr (ELEMENT_WIDTH == 2) + _mm512_mask_compressstoreu_epi16(dst, static_cast<__mmask32>(mask), vsrc); + else if constexpr (ELEMENT_WIDTH == 4) + _mm512_mask_compressstoreu_epi32(dst, static_cast<__mmask16>(mask), vsrc); + else if constexpr (ELEMENT_WIDTH == 8) + _mm512_mask_compressstoreu_epi64(dst, static_cast<__mmask8>(mask), vsrc); +} + +template +inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) +{ + static constexpr size_t VEC_LEN = 64; /// AVX512 vector length - 64 bytes + static constexpr size_t ELEMENT_WIDTH = sizeof(T); + static constexpr size_t ELEMENTS_PER_VEC = VEC_LEN / ELEMENT_WIDTH; + static constexpr UInt64 KMASK = 0xffffffffffffffff >> (64 - ELEMENTS_PER_VEC); + + size_t current_offset = res_data.size(); + size_t reserve_size = res_data.size(); + size_t alloc_size = SIMD_ELEMENTS * 2; + + while (filt_pos < filt_end_aligned) + { + /// to avoid calling resize too frequently, resize to reserve buffer. + if (reserve_size - current_offset < SIMD_ELEMENTS) + { + reserve_size += alloc_size; + resize(res_data, reserve_size); + alloc_size *= 2; + } + + UInt64 mask = bytes64MaskToBits64Mask(filt_pos); + + if (0xffffffffffffffff == mask) + { + for (size_t i = 0; i < SIMD_ELEMENTS; i += ELEMENTS_PER_VEC) + _mm512_storeu_si512(reinterpret_cast(&res_data[current_offset + i]), + _mm512_loadu_si512(reinterpret_cast(data_pos + i))); + current_offset += SIMD_ELEMENTS; + } + else + { + if (mask) + { + for (size_t i = 0; i < SIMD_ELEMENTS; i += ELEMENTS_PER_VEC) + { + compressStoreAVX512(reinterpret_cast(data_pos + i), + reinterpret_cast(&res_data[current_offset]), mask & KMASK); + current_offset += std::popcount(mask & KMASK); + /// prepare mask for next iter, if ELEMENTS_PER_VEC = 64, no next iter + if (ELEMENTS_PER_VEC < 64) + { + mask >>= ELEMENTS_PER_VEC; + } + } + } + } + + filt_pos += SIMD_ELEMENTS; + data_pos += SIMD_ELEMENTS; + } + /// resize to the real size. + res_data.resize(current_offset); +} +) + template ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { @@ -491,36 +619,18 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. - * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + * Therefore, we will optimistically check the parts of `SIMD_ELEMENTS` values. */ - static constexpr size_t SIMD_BYTES = 64; - const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + static constexpr size_t SIMD_ELEMENTS = 64; + const UInt8 * filt_end_aligned = filt_pos + size / SIMD_ELEMENTS * SIMD_ELEMENTS; - while (filt_pos < filt_end_aligned) - { - UInt64 mask = bytes64MaskToBits64Mask(filt_pos); - - if (0xffffffffffffffff == mask) - { - res_data.insert(data_pos, data_pos + SIMD_BYTES); - } - else - { - while (mask) - { - size_t index = std::countr_zero(mask); - res_data.push_back(data_pos[index]); - #ifdef __BMI__ - mask = _blsr_u64(mask); - #else - mask = mask & (mask-1); - #endif - } - } - - filt_pos += SIMD_BYTES; - data_pos += SIMD_BYTES; - } +#if USE_MULTITARGET_CODE + static constexpr bool VBMI2_CAPABLE = sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8; + if (VBMI2_CAPABLE && isArchSupported(TargetArch::AVX512VBMI2)) + TargetSpecific::AVX512VBMI2::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); + else +#endif + TargetSpecific::Default::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); while (filt_pos < filt_end) { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 0f388ef8ac3..a601dd8b405 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -7,11 +7,15 @@ #include #include #include +#include #include #include #include "config.h" +#if USE_MULTITARGET_CODE +# include +#endif namespace DB { @@ -391,6 +395,127 @@ protected: Container data; }; +DECLARE_DEFAULT_CODE( +template +inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) +{ + for (size_t i = 0; i < limit; ++i) + res_data[i] = data[indexes[i]]; +} +); + +DECLARE_AVX512VBMI_SPECIFIC_CODE( +template +inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) +{ + static constexpr UInt64 MASK64 = 0xffffffffffffffff; + const size_t limit64 = limit & ~63; + size_t pos = 0; + size_t data_size = data.size(); + + auto data_pos = reinterpret_cast(data.data()); + auto indexes_pos = reinterpret_cast(indexes.data()); + auto res_pos = reinterpret_cast(res_data.data()); + + if (limit == 0) + return; /// nothing to do, just return + + if (data_size <= 64) + { + /// one single mask load for table size <= 64 + __mmask64 last_mask = MASK64 >> (64 - data_size); + __m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos); + + /// 64 bytes table lookup using one single permutexvar_epi8 + while (pos < limit64) + { + __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); + __m512i out = _mm512_permutexvar_epi8(vidx, table1); + _mm512_storeu_epi8(res_pos + pos, out); + pos += 64; + } + /// tail handling + if (limit > limit64) + { + __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); + __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); + __m512i out = _mm512_permutexvar_epi8(vidx, table1); + _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); + } + } + else if (data_size <= 128) + { + /// table size (64, 128] requires 2 zmm load + __mmask64 last_mask = MASK64 >> (128 - data_size); + __m512i table1 = _mm512_loadu_epi8(data_pos); + __m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64); + + /// 128 bytes table lookup using one single permute2xvar_epi8 + while (pos < limit64) + { + __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); + __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); + _mm512_storeu_epi8(res_pos + pos, out); + pos += 64; + } + if (limit > limit64) + { + __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); + __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); + __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); + _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); + } + } + else + { + if (data_size > 256) + { + /// byte index will not exceed 256 boundary. + data_size = 256; + } + + __m512i table1 = _mm512_loadu_epi8(data_pos); + __m512i table2 = _mm512_loadu_epi8(data_pos + 64); + __m512i table3, table4; + if (data_size <= 192) + { + /// only 3 tables need to load if size <= 192 + __mmask64 last_mask = MASK64 >> (192 - data_size); + table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128); + table4 = _mm512_setzero_si512(); + } + else + { + __mmask64 last_mask = MASK64 >> (256 - data_size); + table3 = _mm512_loadu_epi8(data_pos + 128); + table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192); + } + + /// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB + while (pos < limit64) + { + __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); + __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); + __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); + __mmask64 msb = _mm512_movepi8_mask(vidx); + __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); + _mm512_storeu_epi8(res_pos + pos, out); + pos += 64; + } + if (limit > limit64) + { + __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); + __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); + __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); + __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); + __mmask64 msb = _mm512_movepi8_mask(vidx); + __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); + _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); + } + } +} +); + template template ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_t limit) const @@ -399,8 +524,18 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ auto res = this->create(limit); typename Self::Container & res_data = res->getData(); - for (size_t i = 0; i < limit; ++i) - res_data[i] = data[indexes[i]]; +#if USE_MULTITARGET_CODE + if constexpr (sizeof(T) == 1 && sizeof(Type) == 1) + { + /// VBMI optimization only applicable for (U)Int8 types + if (isArchSupported(TargetArch::AVX512VBMI)) + { + TargetSpecific::AVX512VBMI::vectorIndexImpl(data, indexes, limit, res_data); + return res; + } + } +#endif + TargetSpecific::Default::vectorIndexImpl(data, indexes, limit, res_data); return res; } diff --git a/src/Columns/ColumnVectorHelper.h b/src/Columns/ColumnVectorHelper.h index 36cbfbf640e..b8ea6ca427f 100644 --- a/src/Columns/ColumnVectorHelper.h +++ b/src/Columns/ColumnVectorHelper.h @@ -28,13 +28,17 @@ public: template const char * getRawDataBegin() const { - return reinterpret_cast, 15, 16> *>(reinterpret_cast(this) + sizeof(*this))->raw_data(); + return reinterpret_cast, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD> *>( + reinterpret_cast(this) + sizeof(*this)) + ->raw_data(); } template void insertRawData(const char * ptr) { - return reinterpret_cast, 15, 16> *>(reinterpret_cast(this) + sizeof(*this))->push_back_raw(ptr); + return reinterpret_cast, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD> *>( + reinterpret_cast(this) + sizeof(*this)) + ->push_back_raw(ptr); } }; diff --git a/src/Columns/ColumnsDateTime.h b/src/Columns/ColumnsDateTime.h new file mode 100644 index 00000000000..90d21ed5ff7 --- /dev/null +++ b/src/Columns/ColumnsDateTime.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** Convenience typedefs for columns of SQL types Date, Date32, DateTime and DateTime64. */ + +using ColumnDate = DataTypeDate::ColumnType; +using ColumnDate32 = DataTypeDate32::ColumnType; +using ColumnDateTime = DataTypeDateTime::ColumnType; +using ColumnDateTime64 = DataTypeDateTime64::ColumnType; + +} diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp new file mode 100644 index 00000000000..5017d687791 --- /dev/null +++ b/src/Columns/tests/gtest_column_vector.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include +#include + +using namespace DB; + +static pcg64 rng(randomSeed()); +static constexpr int error_code = 12345; +static constexpr size_t TEST_RUNS = 500; +static constexpr size_t MAX_ROWS = 10000; +static const std::vector filter_ratios = {1, 2, 5, 11, 32, 64, 100, 1000}; +static const size_t K = filter_ratios.size(); + +template +static MutableColumnPtr createColumn(size_t n) +{ + auto column = ColumnVector::create(); + auto & values = column->getData(); + + for (size_t i = 0; i < n; ++i) + values.push_back(static_cast(i)); + + return column; +} + +bool checkFilter(const PaddedPODArray &flit, const IColumn & src, const IColumn & dst) +{ + size_t n = flit.size(); + size_t dst_size = dst.size(); + size_t j = 0; /// index of dest + for (size_t i = 0; i < n; ++i) + { + if (flit[i] != 0) + { + if ((dst_size <= j) || (src.compareAt(i, j, dst, 0) != 0)) + return false; + j++; + } + } + return dst_size == j; /// filtered size check +} + +template +static void testFilter() +{ + auto test_case = [&](size_t rows, size_t filter_ratio) + { + auto vector_column = createColumn(rows); + PaddedPODArray flit(rows); + for (size_t i = 0; i < rows; ++i) + flit[i] = rng() % filter_ratio == 0; + auto res_column = vector_column->filter(flit, -1); + + if (!checkFilter(flit, *vector_column, *res_column)) + throw Exception(error_code, "VectorColumn filter failure, type: {}", typeid(T).name()); + }; + + try + { + for (size_t i = 0; i < TEST_RUNS; ++i) + { + size_t rows = rng() % MAX_ROWS + 1; + size_t filter_ratio = filter_ratios[rng() % K]; + + test_case(rows, filter_ratio); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnVector, Filter) +{ + testFilter(); + testFilter(); + testFilter(); + testFilter(); + testFilter(); + testFilter(); + testFilter(); + testFilter(); + testFilter(); +} + +template +static MutableColumnPtr createIndexColumn(size_t limit, size_t rows) +{ + auto column = ColumnVector::create(); + auto & values = column->getData(); + auto max = std::numeric_limits::max(); + limit = limit > max ? max : limit; + + for (size_t i = 0; i < rows; ++i) + { + T val = rng() % limit; + values.push_back(val); + } + + return column; +} + +template +static void testIndex() +{ + static const std::vector column_sizes = {64, 128, 196, 256, 512}; + + auto test_case = [&](size_t rows, size_t index_rows, size_t limit) + { + auto vector_column = createColumn(rows); + auto index_column = createIndexColumn(rows, index_rows); + auto res_column = vector_column->index(*index_column, limit); + if (limit == 0) + limit = index_column->size(); + + /// check results + if (limit != res_column->size()) + throw Exception(error_code, "ColumnVector index size not match to limit: {} {}", typeid(T).name(), typeid(IndexType).name()); + for (size_t i = 0; i < limit; ++i) + { + /// vector_column data is the same as index, so indexed column's value will equals to index_column. + if (res_column->get64(i) != index_column->get64(i)) + throw Exception(error_code, "ColumnVector index fail: {} {}", typeid(T).name(), typeid(IndexType).name()); + } + }; + + try + { + test_case(0, 0, 0); /// test for zero length index + for (size_t i = 0; i < TEST_RUNS; ++i) + { + /// make sure rows distribute in (column_sizes[r-1], colulmn_sizes[r]] + size_t row_idx = rng() % column_sizes.size(); + size_t row_base = row_idx > 0 ? column_sizes[row_idx - 1] : 0; + size_t rows = row_base + (rng() % (column_sizes[row_idx] - row_base) + 1); + size_t index_rows = rng() % MAX_ROWS + 1; + + test_case(rows, index_rows, 0); + test_case(rows, index_rows, static_cast(0.5 * index_rows)); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnVector, Index) +{ + testIndex(); + testIndex(); + testIndex(); +} diff --git a/src/Common/Arena.h b/src/Common/Arena.h index b706f3b3413..17d53acd8f7 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -34,8 +34,7 @@ namespace DB class Arena : private boost::noncopyable { private: - /// Padding allows to use 'memcpySmallAllowReadWriteOverflow15' instead of 'memcpy'. - static constexpr size_t pad_right = 15; + static constexpr size_t pad_right = PADDING_FOR_SIMD - 1; /// Contiguous MemoryChunk of memory and pointer to free space inside it. Member of single-linked list. struct alignas(16) MemoryChunk : private Allocator /// empty base optimization diff --git a/src/Common/CpuId.h b/src/Common/CpuId.h index 167fa22faf6..1e54ccf62b3 100644 --- a/src/Common/CpuId.h +++ b/src/Common/CpuId.h @@ -82,6 +82,7 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(AVX512BW) \ OP(AVX512VL) \ OP(AVX512VBMI) \ + OP(AVX512VBMI2) \ OP(PREFETCHWT1) \ OP(SHA) \ OP(ADX) \ @@ -302,6 +303,11 @@ bool haveAVX512VBMI() noexcept return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u); } +bool haveAVX512VBMI2() noexcept +{ + return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 6) & 1u); +} + bool haveRDRAND() noexcept { return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u); diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index c1edc5bafad..07c3cf1af1a 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -6,14 +6,13 @@ namespace DB /// Used for left padding of PODArray when empty const char empty_pod_array[empty_pod_array_size]{}; -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; - -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; -template class PODArray, 15, 16>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; } diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 0baefad39e2..ea3115677fc 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -502,7 +502,7 @@ public: template void insertSmallAllowReadWriteOverflow15(It1 from_begin, It2 from_end, TAllocatorParams &&... allocator_params) { - static_assert(pad_right_ >= 15); + static_assert(pad_right_ >= PADDING_FOR_SIMD - 1); static_assert(sizeof(T) == sizeof(*from_begin)); insertPrepare(from_begin, from_end, std::forward(allocator_params)...); size_t bytes_to_copy = this->byte_size(from_end - from_begin); @@ -778,14 +778,13 @@ void swap(PODArray & lhs, P /// Prevent implicit template instantiation of PODArray for common numeric types -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; - -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; -extern template class PODArray, 15, 16>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; +extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; } diff --git a/src/Common/PODArray_fwd.h b/src/Common/PODArray_fwd.h index ec7b3bf4331..56e84d68285 100644 --- a/src/Common/PODArray_fwd.h +++ b/src/Common/PODArray_fwd.h @@ -4,6 +4,7 @@ * PODArray. */ +#include #include #include @@ -22,7 +23,7 @@ class PODArray; /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ template > -using PaddedPODArray = PODArray; +using PaddedPODArray = PODArray; /** A helper for declaring PODArray that uses inline memory. * The initial size is set to use all the inline bytes, since using less would diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index e217d23cc27..6f31009b1d2 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -99,23 +99,25 @@ void updateResources(ElfW(Addr) base_address, std::string_view object_name, std: name = name.substr((name[0] == '_') + strlen("binary_")); name = name.substr(0, name.size() - strlen("_start")); - resources.emplace(name, SymbolIndex::ResourcesBlob{ - base_address, - object_name, - std::string_view{char_address, 0}, // NOLINT - }); + auto & resource = resources[name]; + if (!resource.base_address || resource.base_address == base_address) + { + resource.base_address = base_address; + resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) + resource.object_name = object_name; + } } - else if (name.ends_with("_end")) + if (name.ends_with("_end")) { name = name.substr((name[0] == '_') + strlen("binary_")); name = name.substr(0, name.size() - strlen("_end")); - auto it = resources.find(name); - if (it != resources.end() && it->second.base_address == base_address && it->second.data.empty()) + auto & resource = resources[name]; + if (!resource.base_address || resource.base_address == base_address) { - const char * start = it->second.data.data(); - assert(char_address >= start); - it->second.data = std::string_view{start, static_cast(char_address - start)}; + resource.base_address = base_address; + resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) + resource.object_name = object_name; } } } diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index f2b40f02ead..47162331946 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -51,7 +51,7 @@ public: std::string_view getResource(String name) const { if (auto it = data.resources.find(name); it != data.resources.end()) - return it->second.data; + return it->second.data(); return {}; } @@ -63,11 +63,18 @@ public: { /// Symbol can be presented in multiple shared objects, /// base_address will be used to compare only symbols from the same SO. - ElfW(Addr) base_address; + ElfW(Addr) base_address = 0; /// Just a human name of the SO. std::string_view object_name; /// Data blob. - std::string_view data; + std::string_view start; + std::string_view end; + + std::string_view data() const + { + assert(end.data() >= start.data()); + return std::string_view{start.data(), static_cast(end.data() - start.data())}; + } }; using Resources = std::unordered_map; diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index 9a445ea0fc1..1ab499027bf 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -21,6 +21,8 @@ UInt32 getSupportedArchs() result |= static_cast(TargetArch::AVX512BW); if (Cpu::CpuFlagsCache::have_AVX512VBMI) result |= static_cast(TargetArch::AVX512VBMI); + if (Cpu::CpuFlagsCache::have_AVX512VBMI2) + result |= static_cast(TargetArch::AVX512VBMI2); return result; } @@ -39,8 +41,9 @@ String toString(TargetArch arch) case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; case TargetArch::AVX512F: return "avx512f"; - case TargetArch::AVX512BW: return "avx512bw"; - case TargetArch::AVX512VBMI: return "avx512vbmi"; + case TargetArch::AVX512BW: return "avx512bw"; + case TargetArch::AVX512VBMI: return "avx512vbmi"; + case TargetArch::AVX512VBMI2: return "avx512vbmi"; } UNREACHABLE(); diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index f078c0e3ffc..250642f6ee4 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -31,7 +31,7 @@ * int funcImpl() { * return 2; * } - * ) // DECLARE_DEFAULT_CODE + * ) // DECLARE_AVX2_SPECIFIC_CODE * * int func() { * #if USE_MULTITARGET_CODE @@ -80,8 +80,9 @@ enum class TargetArch : UInt32 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), - AVX512BW = (1 << 4), - AVX512VBMI = (1 << 5), + AVX512BW = (1 << 4), + AVX512VBMI = (1 << 5), + AVX512VBMI2 = (1 << 6), }; /// Runtime detection. @@ -100,6 +101,7 @@ String toString(TargetArch arch); #if defined(__clang__) +#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f"))) @@ -108,6 +110,8 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE +# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2\"))),apply_to=function)") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi\"))),apply_to=function)") # define BEGIN_AVX512BW_SPECIFIC_CODE \ @@ -129,6 +133,7 @@ String toString(TargetArch arch); # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); #else +#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) @@ -137,6 +142,9 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE +# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")") @@ -217,6 +225,16 @@ namespace TargetSpecific::AVX512VBMI { \ } \ END_TARGET_SPECIFIC_CODE +#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) \ +BEGIN_AVX512VBMI2_SPECIFIC_CODE \ +namespace TargetSpecific::AVX512VBMI2 { \ + DUMMY_FUNCTION_DEFINITION \ + using namespace DB::TargetSpecific::AVX512VBMI2; \ + __VA_ARGS__ \ +} \ +END_TARGET_SPECIFIC_CODE + + #else #define USE_MULTITARGET_CODE 0 @@ -229,6 +247,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512F_SPECIFIC_CODE(...) #define DECLARE_AVX512BW_SPECIFIC_CODE(...) #define DECLARE_AVX512VBMI_SPECIFIC_CODE(...) +#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) #endif @@ -245,8 +264,9 @@ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) \ -DECLARE_AVX512BW_SPECIFIC_CODE(__VA_ARGS__) \ -DECLARE_AVX512VBMI_SPECIFIC_CODE(__VA_ARGS__) +DECLARE_AVX512BW_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX512VBMI_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX512VBMI2_SPECIFIC_CODE (__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; /// NOLINT @@ -276,6 +296,9 @@ DECLARE_AVX512VBMI_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512VBMI; /// NOLINT ) // DECLARE_AVX512VBMI_SPECIFIC_CODE +DECLARE_AVX512VBMI2_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX512VBMI2; /// NOLINT +) // DECLARE_AVX512VBMI2_SPECIFIC_CODE /** Runtime Dispatch helpers for class members. * diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 6742a554a85..f19a412db37 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -178,7 +178,10 @@ public: func = std::forward(func), args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { - SCOPE_EXIT(state->event.set()); + SCOPE_EXIT( + state->thread_id = std::thread::id(); + state->event.set(); + ); state->thread_id = std::this_thread::get_id(); diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 406d8b27c39..ab5d918e1f0 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -777,19 +777,34 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & return false; } -void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path) +void ZooKeeper::handleEphemeralNodeExistence(const std::string & path, const std::string & fast_delete_if_equal_value) { zkutil::EventPtr eph_node_disappeared = std::make_shared(); String content; - if (!tryGet(path, content, nullptr, eph_node_disappeared)) + Coordination::Stat stat; + if (!tryGet(path, content, &stat, eph_node_disappeared)) return; - int32_t timeout_ms = 3 * args.session_timeout_ms; - if (!eph_node_disappeared->tryWait(timeout_ms)) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, - "Ephemeral node {} still exists after {}s, probably it's owned by someone else. " - "Either session_timeout_ms in client's config is different from server's config or it's a bug. " - "Node data: '{}'", path, timeout_ms / 1000, content); + if (content == fast_delete_if_equal_value) + { + auto code = tryRemove(path, stat.version); + if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) + throw Coordination::Exception(code, path); + } + else + { + LOG_WARNING(log, "Ephemeral node ('{}') already exists but it isn't owned by us. Will wait until it disappears", path); + int32_t timeout_ms = 3 * args.session_timeout_ms; + if (!eph_node_disappeared->tryWait(timeout_ms)) + throw DB::Exception( + DB::ErrorCodes::LOGICAL_ERROR, + "Ephemeral node {} still exists after {}s, probably it's owned by someone else. " + "Either session_timeout_ms in client's config is different from server's config or it's a bug. " + "Node data: '{}'", + path, + timeout_ms / 1000, + content); + } } ZooKeeperPtr ZooKeeper::startNewSession() const diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index f7d5bccc0e0..968d10ad9a5 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -393,9 +393,11 @@ public: /// The function returns true if waited and false if waiting was interrupted by condition. bool waitForDisappear(const std::string & path, const WaitCondition & condition = {}); - /// Wait for the ephemeral node created in previous session to disappear. - /// Throws LOGICAL_ERROR if node still exists after 2x session_timeout. - void waitForEphemeralToDisappearIfAny(const std::string & path); + /// Checks if a the ephemeral node exists. These nodes are removed automatically by ZK when the session ends + /// If the node exists and its value is equal to fast_delete_if_equal_value it will remove it + /// If the node exists and its value is different, it will wait for it to disappear. It will throw a LOGICAL_ERROR if the node doesn't + /// disappear automatically after 3x session_timeout. + void handleEphemeralNodeExistence(const std::string & path, const std::string & fast_delete_if_equal_value); /// Async interface (a small subset of operations is implemented). /// @@ -609,7 +611,7 @@ public: catch (...) { ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode); - DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path + ": "); + DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path); } } diff --git a/src/Common/tests/gtest_pod_array.cpp b/src/Common/tests/gtest_pod_array.cpp index 82a6f7589b8..b0c1aab0732 100644 --- a/src/Common/tests/gtest_pod_array.cpp +++ b/src/Common/tests/gtest_pod_array.cpp @@ -532,7 +532,7 @@ TEST(Common, PODNoOverallocation) } } - EXPECT_EQ(capacities, (std::vector{4065, 8161, 16353, 32737, 65505, 131041, 262113, 524257, 1048545})); + EXPECT_EQ(capacities, (std::vector{3969, 8065, 16257, 32641, 65409, 130945, 262017, 524161, 1048449})); } template diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 80efe4f77bf..ae3701c18f0 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -14,17 +14,20 @@ /// The size of the I/O buffer by default. #define DBMS_DEFAULT_BUFFER_SIZE 1048576ULL +#define PADDING_FOR_SIMD 64 + /** Which blocks by default read the data (by number of rows). * Smaller values give better cache locality, less consumption of RAM, but more overhead to process the query. */ -#define DEFAULT_BLOCK_SIZE 65505 /// 65536 minus 16 + 15 bytes padding that we usually have in arrays +#define DEFAULT_BLOCK_SIZE 65409 /// 65536 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays /** Which blocks should be formed for insertion into the table, if we control the formation of blocks. * (Sometimes the blocks are inserted exactly such blocks that have been read / transmitted from the outside, and this parameter does not affect their size.) * More than DEFAULT_BLOCK_SIZE, because in some tables a block of data on the disk is created for each block (quite a big thing), * and if the parts were small, then it would be costly then to combine them. */ -#define DEFAULT_INSERT_BLOCK_SIZE 1048545 /// 1048576 minus 16 + 15 bytes padding that we usually have in arrays +#define DEFAULT_INSERT_BLOCK_SIZE \ + 1048449 /// 1048576 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays /** The same, but for merge operations. Less DEFAULT_BLOCK_SIZE for saving RAM (since all the columns are read). * Significantly less, since there are 10-way mergers. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0b8d24b1abc..fd86afe1bc0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -93,6 +93,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \ M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ + M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ @@ -302,6 +303,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ + M(Bool, use_analyzer, false, "Use analyzer", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ \ \ diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 3704a511478..8a6bea2565b 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -24,13 +24,13 @@ bool IDisk::isDirectoryEmpty(const String & path) const return !iterateDirectory(path)->isValid(); } -void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path) +void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings) /// NOLINT { LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.", getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path); auto in = readFile(from_file_path); - auto out = to_disk.writeFile(to_file_path); + auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); copyData(*in, *out); out->finalize(); } @@ -56,15 +56,15 @@ void IDisk::removeSharedFiles(const RemoveBatchRequest & files, bool keep_all_ba using ResultsCollector = std::vector>; -void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir) +void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings) { if (from_disk.isFile(from_path)) { auto result = exec.execute( - [&from_disk, from_path, &to_disk, to_path]() + [&from_disk, from_path, &to_disk, to_path, &settings]() { setThreadName("DiskCopier"); - from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path)); + from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); }); results.push_back(std::move(result)); @@ -80,7 +80,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p } for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next()) - asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true); + asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings); } } @@ -89,7 +89,12 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptrgetExecutor(); ResultsCollector results; - asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir); + WriteSettings settings; + /// Disable parallel write. We already copy in parallel. + /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage + settings.s3_allow_parallel_part_upload = false; + + asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir, settings); for (auto & result : results) result.wait(); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 4a7be740ccf..66a5c55f7f7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -181,7 +181,11 @@ public: virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir); /// Copy file `from_file_path` to `to_file_path` located at `to_disk`. - virtual void copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path); + virtual void copyFile( /// NOLINT + const String & from_file_path, + IDisk & to_disk, + const String & to_file_path, + const WriteSettings & settings = {}); /// List files at `path` and add their names to `file_names` virtual void listFiles(const String & path, std::vector & file_names) const = 0; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e627a807bd8..83908d02f48 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -230,7 +230,9 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); auto settings_ptr = s3_settings.get(); - auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + ThreadPoolCallbackRunner scheduler; + if (write_settings.s3_allow_parallel_part_upload) + scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); auto s3_buffer = std::make_unique( client.get(), diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index ad36c51447f..c84e23da85b 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -22,6 +22,7 @@ list (APPEND PUBLIC_LIBS ch_contrib::metrohash ch_contrib::murmurhash ch_contrib::hashidsxx + ch_contrib::morton_nd ) list (APPEND PRIVATE_LIBS diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp index 9309c4cdbeb..f6dacc77045 100644 --- a/src/Functions/FunctionsCodingUUID.cpp +++ b/src/Functions/FunctionsCodingUUID.cpp @@ -13,36 +13,151 @@ #include #include -namespace DB -{ +#include -namespace ErrorCodes +namespace DB::ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; +extern const int ARGUMENT_OUT_OF_BOUND; +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int LOGICAL_ERROR; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +namespace +{ +enum class Representation +{ + BigEndian, + LittleEndian +}; + +std::pair determineBinaryStartIndexWithIncrement(const ptrdiff_t num_bytes, const Representation representation) +{ + if (representation == Representation::BigEndian) + return {0, 1}; + else if (representation == Representation::LittleEndian) + return {num_bytes - 1, -1}; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "{} is not handled yet", magic_enum::enum_name(representation)); +} + +void formatHex(const std::span src, UInt8 * dst, const Representation representation) +{ + const auto src_size = std::ssize(src); + const auto [src_start_index, src_increment] = determineBinaryStartIndexWithIncrement(src_size, representation); + for (int src_pos = src_start_index, dst_pos = 0; src_pos >= 0 && src_pos < src_size; src_pos += src_increment, dst_pos += 2) + writeHexByteLowercase(src[src_pos], dst + dst_pos); +} + +void parseHex(const UInt8 * __restrict src, const std::span dst, const Representation representation) +{ + const auto dst_size = std::ssize(dst); + const auto [dst_start_index, dst_increment] = determineBinaryStartIndexWithIncrement(dst_size, representation); + const auto * src_as_char = reinterpret_cast(src); + for (auto dst_pos = dst_start_index, src_pos = 0; dst_pos >= 0 && dst_pos < dst_size; dst_pos += dst_increment, src_pos += 2) + dst[dst_pos] = unhex2(src_as_char + src_pos); +} + +class UUIDSerializer +{ +public: + enum class Variant + { + Default = 1, + Microsoft = 2 + }; + + explicit UUIDSerializer(const Variant variant) + : first_half_binary_representation(variant == Variant::Microsoft ? Representation::LittleEndian : Representation::BigEndian) + { + if (variant != Variant::Default && variant != Variant::Microsoft) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "{} is not handled yet", magic_enum::enum_name(variant)); + } + + void deserialize(const UInt8 * src16, UInt8 * dst36) const + { + formatHex({src16, 4}, &dst36[0], first_half_binary_representation); + dst36[8] = '-'; + formatHex({src16 + 4, 2}, &dst36[9], first_half_binary_representation); + dst36[13] = '-'; + formatHex({src16 + 6, 2}, &dst36[14], first_half_binary_representation); + dst36[18] = '-'; + formatHex({src16 + 8, 2}, &dst36[19], Representation::BigEndian); + dst36[23] = '-'; + formatHex({src16 + 10, 6}, &dst36[24], Representation::BigEndian); + } + + void serialize(const UInt8 * src36, UInt8 * dst16) const + { + /// If string is not like UUID - implementation specific behaviour. + parseHex(&src36[0], {dst16 + 0, 4}, first_half_binary_representation); + parseHex(&src36[9], {dst16 + 4, 2}, first_half_binary_representation); + parseHex(&src36[14], {dst16 + 6, 2}, first_half_binary_representation); + parseHex(&src36[19], {dst16 + 8, 2}, Representation::BigEndian); + parseHex(&src36[24], {dst16 + 10, 6}, Representation::BigEndian); + } + +private: + Representation first_half_binary_representation; +}; + +void checkArgumentCount(const DB::DataTypes & arguments, const std::string_view function_name) +{ + if (const auto argument_count = std::ssize(arguments); argument_count < 1 || argument_count > 2) + throw DB::Exception( + DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2", + function_name, + argument_count); +} + +void checkFormatArgument(const DB::DataTypes & arguments, const std::string_view function_name) +{ + if (const auto argument_count = std::ssize(arguments); + argument_count > 1 && !DB::WhichDataType(arguments[1]).isInt8() && !DB::WhichDataType(arguments[1]).isUInt8()) + throw DB::Exception( + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected Int8 or UInt8 type", + arguments[1]->getName(), + function_name); +} + +UUIDSerializer::Variant parseVariant(const DB::ColumnsWithTypeAndName & arguments) +{ + if (arguments.size() < 2) + return UUIDSerializer::Variant::Default; + + const auto representation = static_cast>(arguments[1].column->getInt(0)); + const auto as_enum = magic_enum::enum_cast(representation); + if (!as_enum) + throw DB::Exception(DB::ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected UUID variant, got {}", representation); + + return *as_enum; +} +} + +namespace DB +{ constexpr size_t uuid_bytes_length = 16; constexpr size_t uuid_text_length = 36; class FunctionUUIDNumToString : public IFunction { - public: static constexpr auto name = "UUIDNumToString"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { + checkArgumentCount(arguments, name); + const auto * ptr = checkAndGetDataType(arguments[0].get()); if (!ptr || ptr->getN() != uuid_bytes_length) throw Exception("Illegal type " + arguments[0]->getName() + @@ -50,6 +165,8 @@ public: ", expected FixedString(" + toString(uuid_bytes_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + checkFormatArgument(arguments, name); + return std::make_shared(); } @@ -59,7 +176,7 @@ public: { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; - + const auto variant = parseVariant(arguments); if (const auto * col_in = checkAndGetColumn(column.get())) { if (col_in->getN() != uuid_bytes_length) @@ -82,9 +199,10 @@ public: size_t src_offset = 0; size_t dst_offset = 0; + const UUIDSerializer uuid_serializer(variant); for (size_t i = 0; i < size; ++i) { - formatUUID(&vec_in[src_offset], &vec_res[dst_offset]); + uuid_serializer.deserialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_bytes_length; dst_offset += uuid_text_length; vec_res[dst_offset] = 0; @@ -104,55 +222,33 @@ public: class FunctionUUIDStringToNum : public IFunction { -private: - static void parseHex(const UInt8 * __restrict src, UInt8 * __restrict dst, const size_t num_bytes) - { - size_t src_pos = 0; - size_t dst_pos = 0; - for (; dst_pos < num_bytes; ++dst_pos) - { - dst[dst_pos] = unhex2(reinterpret_cast(&src[src_pos])); - src_pos += 2; - } - } - - static void parseUUID(const UInt8 * src36, UInt8 * dst16) - { - /// If string is not like UUID - implementation specific behaviour. - - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[9], &dst16[4], 2); - parseHex(&src36[14], &dst16[6], 2); - parseHex(&src36[19], &dst16[8], 2); - parseHex(&src36[24], &dst16[10], 6); - } - public: static constexpr auto name = "UUIDStringToNum"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { + checkArgumentCount(arguments, name); + /// String or FixedString(36) if (!isString(arguments[0])) { const auto * ptr = checkAndGetDataType(arguments[0].get()); if (!ptr || ptr->getN() != uuid_text_length) throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + + " of first argument of function " + getName() + ", expected FixedString(" + toString(uuid_text_length) + ")", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + checkFormatArgument(arguments, name); + return std::make_shared(uuid_bytes_length); } @@ -163,6 +259,7 @@ public: const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; + const UUIDSerializer uuid_serializer(parseVariant(arguments)); if (const auto * col_in = checkAndGetColumn(column.get())) { const auto & vec_in = col_in->getChars(); @@ -184,7 +281,7 @@ public: size_t string_size = offsets_in[i] - src_offset; if (string_size == uuid_text_length + 1) - parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); + uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]); else memset(&vec_res[dst_offset], 0, uuid_bytes_length); @@ -216,7 +313,7 @@ public: for (size_t i = 0; i < size; ++i) { - parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); + uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_text_length; dst_offset += uuid_bytes_length; } diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 40522fa1a6e..6078312537f 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -124,7 +124,7 @@ void RandImpl::execute(char * output, size_t size) char * end = output + size; constexpr int vec_size = 4; - constexpr int safe_overwrite = 15; + constexpr int safe_overwrite = PADDING_FOR_SIMD - 1; constexpr int bytes_per_write = 4 * sizeof(UInt64x4); UInt64 rand_seed = randomSeed(); diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 286ed4a729d..a47fc71c335 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -157,7 +158,7 @@ struct TimeWindowImpl const auto & interval_column = arguments[1]; const auto & from_datatype = *time_column.type.get(); const auto which_type = WhichDataType(from_datatype); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); if (!which_type.isDateTime() || !time_column_vec) throw Exception( @@ -198,7 +199,7 @@ struct TimeWindowImpl } template - static ColumnPtr executeTumble(const ColumnUInt32 & time_column, UInt64 num_units, const DateLUTImpl & time_zone) + static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone) { const auto & time_data = time_column.getData(); size_t size = time_column.size(); @@ -342,7 +343,7 @@ struct TimeWindowImpl const auto & hop_interval_column = arguments[1]; const auto & window_interval_column = arguments[2]; const auto & from_datatype = *time_column.type.get(); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0); if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec) throw Exception( @@ -402,7 +403,7 @@ struct TimeWindowImpl template static ColumnPtr - executeHop(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) { const auto & time_data = time_column.getData(); size_t size = time_column.size(); @@ -491,7 +492,7 @@ struct TimeWindowImpl const auto & hop_interval_column = arguments[1]; const auto & window_interval_column = arguments[2]; const auto & from_datatype = *time_column.type.get(); - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 3, 0); if (!WhichDataType(from_datatype).isDateTime() || !time_column_vec) throw Exception( @@ -551,7 +552,7 @@ struct TimeWindowImpl template static ColumnPtr - executeHopSlice(const ColumnUInt32 & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) { Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units); diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h index 73137da474f..0d1b1cac8ef 100644 --- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h +++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h @@ -16,7 +16,7 @@ struct FirstSignificantSubdomainDefaultLookup } }; -template +template struct ExtractFirstSignificantSubdomain { static size_t getReserveLengthForElement() { return 10; } @@ -35,7 +35,7 @@ struct ExtractFirstSignificantSubdomain Pos tmp; size_t domain_length; - ExtractDomain::execute(data, size, tmp, domain_length); + ExtractDomain::execute(data, size, tmp, domain_length); if (domain_length == 0) return; @@ -105,7 +105,7 @@ struct ExtractFirstSignificantSubdomain Pos tmp; size_t domain_length; - ExtractDomain::execute(data, size, tmp, domain_length); + ExtractDomain::execute(data, size, tmp, domain_length); if (domain_length == 0) return; diff --git a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp index dddfbe4f4dd..10c41b6a4c3 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomain.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomain.cpp @@ -6,7 +6,7 @@ namespace DB { -template +template struct CutToFirstSignificantSubdomain { static size_t getReserveLengthForElement() { return 15; } @@ -19,7 +19,7 @@ struct CutToFirstSignificantSubdomain Pos tmp_data; size_t tmp_length; Pos domain_end; - ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end); + ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, tmp_length, &domain_end); if (tmp_length == 0) return; @@ -30,15 +30,23 @@ struct CutToFirstSignificantSubdomain }; struct NameCutToFirstSignificantSubdomain { static constexpr auto name = "cutToFirstSignificantSubdomain"; }; -using FunctionCutToFirstSignificantSubdomain = FunctionStringToString>, NameCutToFirstSignificantSubdomain>; +using FunctionCutToFirstSignificantSubdomain = FunctionStringToString>, NameCutToFirstSignificantSubdomain>; struct NameCutToFirstSignificantSubdomainWithWWW { static constexpr auto name = "cutToFirstSignificantSubdomainWithWWW"; }; -using FunctionCutToFirstSignificantSubdomainWithWWW = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWW>; +using FunctionCutToFirstSignificantSubdomainWithWWW = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWW>; + +struct NameCutToFirstSignificantSubdomainRFC { static constexpr auto name = "cutToFirstSignificantSubdomainRFC"; }; +using FunctionCutToFirstSignificantSubdomainRFC = FunctionStringToString>, NameCutToFirstSignificantSubdomainRFC>; + +struct NameCutToFirstSignificantSubdomainWithWWWRFC { static constexpr auto name = "cutToFirstSignificantSubdomainWithWWWRFC"; }; +using FunctionCutToFirstSignificantSubdomainWithWWWRFC = FunctionStringToString>, NameCutToFirstSignificantSubdomainWithWWWRFC>; REGISTER_FUNCTION(CutToFirstSignificantSubdomain) { factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp index a2e51200910..521216c84a7 100644 --- a/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/cutToFirstSignificantSubdomainCustom.cpp @@ -5,7 +5,7 @@ namespace DB { -template +template struct CutToFirstSignificantSubdomainCustom { static size_t getReserveLengthForElement() { return 15; } @@ -18,7 +18,7 @@ struct CutToFirstSignificantSubdomainCustom Pos tmp_data; size_t tmp_length; Pos domain_end; - ExtractFirstSignificantSubdomain::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end); + ExtractFirstSignificantSubdomain::executeCustom(tld_lookup, data, size, tmp_data, tmp_length, &domain_end); if (tmp_length == 0) return; @@ -29,15 +29,23 @@ struct CutToFirstSignificantSubdomainCustom }; struct NameCutToFirstSignificantSubdomainCustom { static constexpr auto name = "cutToFirstSignificantSubdomainCustom"; }; -using FunctionCutToFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustom>; +using FunctionCutToFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustom>; struct NameCutToFirstSignificantSubdomainCustomWithWWW { static constexpr auto name = "cutToFirstSignificantSubdomainCustomWithWWW"; }; -using FunctionCutToFirstSignificantSubdomainCustomWithWWW = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWW>; +using FunctionCutToFirstSignificantSubdomainCustomWithWWW = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWW>; + +struct NameCutToFirstSignificantSubdomainCustomRFC { static constexpr auto name = "cutToFirstSignificantSubdomainCustomRFC"; }; +using FunctionCutToFirstSignificantSubdomainCustomRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomRFC>; + +struct NameCutToFirstSignificantSubdomainCustomWithWWWRFC { static constexpr auto name = "cutToFirstSignificantSubdomainCustomWithWWWRFC"; }; +using FunctionCutToFirstSignificantSubdomainCustomWithWWWRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameCutToFirstSignificantSubdomainCustomWithWWWRFC>; REGISTER_FUNCTION(CutToFirstSignificantSubdomainCustom) { factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/domain.cpp b/src/Functions/URL/domain.cpp index 1d781b37943..e7fead24dc9 100644 --- a/src/Functions/URL/domain.cpp +++ b/src/Functions/URL/domain.cpp @@ -7,12 +7,15 @@ namespace DB { struct NameDomain { static constexpr auto name = "domain"; }; -using FunctionDomain = FunctionStringToString>, NameDomain>; +using FunctionDomain = FunctionStringToString>, NameDomain>; +struct NameDomainRFC { static constexpr auto name = "domainRFC"; }; +using FunctionDomainRFC = FunctionStringToString>, NameDomainRFC>; REGISTER_FUNCTION(Domain) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 1245bb20182..64362edf2c3 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -20,6 +20,115 @@ inline std::string_view checkAndReturnHost(const Pos & pos, const Pos & dot_pos, return std::string_view(start_of_host, pos - start_of_host); } +/// Extracts host from given url (RPC). +/// +/// @return empty string view if the host is not valid (i.e. it does not have dot, or there no symbol after dot). +inline std::string_view getURLHostRFC(const char * data, size_t size) +{ + Pos pos = data; + Pos end = data + size; + + if (*pos == '/' && *(pos + 1) == '/') + { + pos += 2; + } + else + { + Pos scheme_end = data + std::min(size, 16UL); + for (++pos; pos < scheme_end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch (*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return std::string_view{}; + default: + goto exloop; + } + } + } +exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; + } + + Pos dot_pos = nullptr; + Pos colon_pos = nullptr; + bool has_at_symbol = false; + bool has_terminator_after_colon = false; + const auto * start_of_host = pos; + for (; pos < end; ++pos) + { + switch (*pos) + { + case '.': + if (has_at_symbol || colon_pos == nullptr) + dot_pos = pos; + break; + case ':': + if (has_at_symbol || colon_pos) goto done; + colon_pos = pos; + break; + case '/': /// end symbols + case '?': + case '#': + goto done; + case '@': /// myemail@gmail.com + if (has_terminator_after_colon) return std::string_view{}; + if (has_at_symbol) goto done; + has_at_symbol = true; + start_of_host = pos + 1; + break; + case ' ': /// restricted symbols in whole URL + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + if (colon_pos == nullptr) + return std::string_view{}; + else + has_terminator_after_colon = true; + } + } + +done: + if (!has_at_symbol) + pos = colon_pos ? colon_pos : pos; + return checkAndReturnHost(pos, dot_pos, start_of_host); +} + /// Extracts host from given url. /// /// @return empty string view if the host is not valid (i.e. it does not have dot, or there no symbol after dot). @@ -113,14 +222,18 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos return checkAndReturnHost(pos, dot_pos, start_of_host); } -template +template struct ExtractDomain { static size_t getReserveLengthForElement() { return 15; } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) { - std::string_view host = getURLHost(data, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(data, size); + else + host = getURLHost(data, size); if (host.empty()) { diff --git a/src/Functions/URL/domainWithoutWWW.cpp b/src/Functions/URL/domainWithoutWWW.cpp index 53ff5bc919e..2fa9159d7af 100644 --- a/src/Functions/URL/domainWithoutWWW.cpp +++ b/src/Functions/URL/domainWithoutWWW.cpp @@ -6,12 +6,16 @@ namespace DB { struct NameDomainWithoutWWW { static constexpr auto name = "domainWithoutWWW"; }; -using FunctionDomainWithoutWWW = FunctionStringToString>, NameDomainWithoutWWW>; +using FunctionDomainWithoutWWW = FunctionStringToString>, NameDomainWithoutWWW>; + +struct NameDomainWithoutWWWRFC { static constexpr auto name = "domainWithoutWWWRFC"; }; +using FunctionDomainWithoutWWWRFC = FunctionStringToString>, NameDomainWithoutWWWRFC>; REGISTER_FUNCTION(DomainWithoutWWW) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/firstSignificantSubdomain.cpp b/src/Functions/URL/firstSignificantSubdomain.cpp index d3aeb90771f..902a4f43fba 100644 --- a/src/Functions/URL/firstSignificantSubdomain.cpp +++ b/src/Functions/URL/firstSignificantSubdomain.cpp @@ -7,12 +7,15 @@ namespace DB { struct NameFirstSignificantSubdomain { static constexpr auto name = "firstSignificantSubdomain"; }; +using FunctionFirstSignificantSubdomain = FunctionStringToString>, NameFirstSignificantSubdomain>; -using FunctionFirstSignificantSubdomain = FunctionStringToString>, NameFirstSignificantSubdomain>; +struct NameFirstSignificantSubdomainRFC { static constexpr auto name = "firstSignificantSubdomainRFC"; }; +using FunctionFirstSignificantSubdomainRFC = FunctionStringToString>, NameFirstSignificantSubdomainRFC>; REGISTER_FUNCTION(FirstSignificantSubdomain) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/firstSignificantSubdomainCustom.cpp b/src/Functions/URL/firstSignificantSubdomainCustom.cpp index f43b42d0309..c07aa2b3ac8 100644 --- a/src/Functions/URL/firstSignificantSubdomainCustom.cpp +++ b/src/Functions/URL/firstSignificantSubdomainCustom.cpp @@ -7,12 +7,15 @@ namespace DB { struct NameFirstSignificantSubdomainCustom { static constexpr auto name = "firstSignificantSubdomainCustom"; }; +using FunctionFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustom>; -using FunctionFirstSignificantSubdomainCustom = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustom>; +struct NameFirstSignificantSubdomainCustomRFC { static constexpr auto name = "firstSignificantSubdomainCustomRFC"; }; +using FunctionFirstSignificantSubdomainCustomRFC = FunctionCutToFirstSignificantSubdomainCustomImpl, NameFirstSignificantSubdomainCustomRFC>; REGISTER_FUNCTION(FirstSignificantSubdomainCustom) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index 85b060ca987..f716f3e454b 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -18,12 +18,9 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct FunctionPort : public IFunction +template +struct FunctionPortImpl : public IFunction { - static constexpr auto name = "port"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const override { return true; } @@ -94,7 +91,12 @@ private: const char * p = reinterpret_cast(buf.data()) + offset; const char * end = p + size; - std::string_view host = getURLHost(p, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(p, size); + else + host = getURLHost(p, size); + if (host.empty()) return default_port; if (host.size() == size) @@ -121,9 +123,24 @@ private: } }; +struct FunctionPort : public FunctionPortImpl +{ + static constexpr auto name = "port"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } +}; + +struct FunctionPortRFC : public FunctionPortImpl +{ + static constexpr auto name = "portRFC"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } +}; + REGISTER_FUNCTION(Port) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/URL/topLevelDomain.cpp b/src/Functions/URL/topLevelDomain.cpp index 9937618cae9..f5610ed93b7 100644 --- a/src/Functions/URL/topLevelDomain.cpp +++ b/src/Functions/URL/topLevelDomain.cpp @@ -5,13 +5,18 @@ namespace DB { +template struct ExtractTopLevelDomain { static size_t getReserveLengthForElement() { return 5; } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) { - std::string_view host = getURLHost(data, size); + std::string_view host; + if constexpr (conform_rfc) + host = getURLHostRFC(data, size); + else + host = getURLHost(data, size); res_data = data; res_size = 0; @@ -41,11 +46,15 @@ struct ExtractTopLevelDomain }; struct NameTopLevelDomain { static constexpr auto name = "topLevelDomain"; }; -using FunctionTopLevelDomain = FunctionStringToString, NameTopLevelDomain>; +using FunctionTopLevelDomain = FunctionStringToString>, NameTopLevelDomain>; + +struct NameTopLevelDomainRFC { static constexpr auto name = "topLevelDomainRFC"; }; +using FunctionTopLevelDomainRFC = FunctionStringToString>, NameTopLevelDomainRFC>; REGISTER_FUNCTION(TopLevelDomain) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 6d500cc15c4..dfed7cedcf0 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -185,8 +185,10 @@ public: const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); if (!data_type_function) - throw Exception("First argument for function " + getName() + " must be a function", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be a function. Actual {}", + getName(), + arguments[0].type->getName()); /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index acd8f89ffe5..59224096d3c 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1025,12 +1025,14 @@ ColumnPtr FunctionArrayElement::executeMap( if (col_const_map) values_array = ColumnConst::create(values_array, input_rows_count); + const auto & type_map = assert_cast(*arguments[0].type); + /// Prepare arguments to call arrayElement for array with values and calculated indices at previous step. ColumnsWithTypeAndName new_arguments = { { values_array, - std::make_shared(result_type), + std::make_shared(type_map.getValueType()), "" }, { @@ -1086,7 +1088,9 @@ ColumnPtr FunctionArrayElement::executeImpl(const ColumnsWithTypeAndName & argum col_array = checkAndGetColumn(arguments[0].column.get()); if (col_array) + { is_array_of_nullable = isColumnNullable(col_array->getData()); + } else { col_const_array = checkAndGetColumnConstData(arguments[0].column.get()); diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index b33fcf32de1..f5a4b50fb54 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,6 @@ namespace */ class FunctionDateDiff : public IFunction { - using ColumnDateTime64 = ColumnDecimal; public: static constexpr auto name = "dateDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } @@ -141,19 +141,19 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) + if (const auto * x_vec_16 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) + else if (const auto * x_vec_32 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) + else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); else if (const auto * x_vec_64 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) + else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) + else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); @@ -169,19 +169,19 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); @@ -197,11 +197,11 @@ private: const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&y)) constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&y)) constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); diff --git a/src/Functions/grouping.h b/src/Functions/grouping.h index b9ef6ffc107..830c509f1f5 100644 --- a/src/Functions/grouping.h +++ b/src/Functions/grouping.h @@ -13,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + class FunctionGroupingBase : public IFunction { protected: @@ -71,6 +76,22 @@ public: } }; +class FunctionGrouping : public FunctionGroupingBase +{ +public: + explicit FunctionGrouping(bool force_compatibility_) + : FunctionGroupingBase(ColumnNumbers(), force_compatibility_) + {} + + String getName() const override { return "grouping"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t) const override + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Method executeImpl is not supported for 'grouping' function"); + } +}; + class FunctionGroupingOrdinary : public FunctionGroupingBase { public: diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index c9571a7333d..7ebca71af13 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -149,7 +150,7 @@ struct MakeDateTraits { static constexpr auto name = "makeDate"; using ReturnDataType = DataTypeDate; - using ReturnColumnType = ColumnUInt16; + using ReturnColumnType = ColumnDate; static constexpr auto MIN_YEAR = 1970; static constexpr auto MAX_YEAR = 2149; @@ -162,7 +163,7 @@ struct MakeDate32Traits { static constexpr auto name = "makeDate32"; using ReturnDataType = DataTypeDate32; - using ReturnColumnType = ColumnInt32; + using ReturnColumnType = ColumnDate32; static constexpr auto MIN_YEAR = 1900; static constexpr auto MAX_YEAR = 2299; @@ -267,7 +268,7 @@ public: Columns converted_arguments; convertRequiredArguments(arguments, converted_arguments); - auto res_column = ColumnUInt32::create(input_rows_count); + auto res_column = ColumnDateTime::create(input_rows_count); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); @@ -365,7 +366,7 @@ public: fraction_data = &typeid_cast(*converted_arguments[6]).getData(); } - auto res_column = ColumnDecimal::create(input_rows_count, static_cast(precision)); + auto res_column = ColumnDateTime64::create(input_rows_count, static_cast(precision)); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); diff --git a/src/Functions/mortonDecode.cpp b/src/Functions/mortonDecode.cpp new file mode 100644 index 00000000000..337fd5e3a38 --- /dev/null +++ b/src/Functions/mortonDecode.cpp @@ -0,0 +1,433 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; +} + +#define EXTRACT_VECTOR(INDEX) \ + auto col##INDEX = ColumnUInt64::create(); \ + auto & vec##INDEX = col##INDEX->getData(); \ + vec##INDEX.resize(input_rows_count); + +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D_Dec.Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +#define MASK(IDX, ...) \ + ((mask) ? shrink(mask->getColumn((IDX)).getUInt(0), std::get(__VA_ARGS__)) : std::get(__VA_ARGS__)) + +#define EXECUTE() \ + size_t nd; \ + const auto * col_const = typeid_cast(arguments[0].column.get()); \ + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); \ + if (mask) \ + nd = mask->tupleSize(); \ + else \ + nd = col_const->getUInt(0); \ + auto non_const_arguments = arguments; \ + non_const_arguments[1].column = non_const_arguments[1].column->convertToFullColumnIfConst(); \ + const ColumnPtr & col_code = non_const_arguments[1].column; \ + Columns tuple_columns(nd); \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + if (mask) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = shrink(mask->getColumn(0).getUInt(0), col_code->getUInt(i)); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + else \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec0[i] = col_code->getUInt(i); \ + } \ + tuple_columns[0] = std::move(col0); \ + } \ + return ColumnTuple::create(tuple_columns); \ + } \ + EXTRACT_VECTOR(1) \ + DECODE(2, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res);) \ + EXTRACT_VECTOR(2) \ + DECODE(3, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res);) \ + EXTRACT_VECTOR(3) \ + DECODE(4, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res);) \ + EXTRACT_VECTOR(4) \ + DECODE(5, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res);) \ + EXTRACT_VECTOR(5) \ + DECODE(6, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res);) \ + EXTRACT_VECTOR(6) \ + DECODE(7, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res);) \ + EXTRACT_VECTOR(7) \ + DECODE(8, \ + vec0[i] = MASK(0, res); \ + vec1[i] = MASK(1, res); \ + vec2[i] = MASK(2, res); \ + vec3[i] = MASK(3, res); \ + vec4[i] = MASK(4, res); \ + vec5[i] = MASK(5, res); \ + vec6[i] = MASK(6, res); \ + vec7[i] = MASK(7, res);) \ + switch (nd) \ + { \ + case 2: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + break; \ + case 3: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + return ColumnTuple::create(tuple_columns); \ + case 4: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + return ColumnTuple::create(tuple_columns); \ + case 5: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + return ColumnTuple::create(tuple_columns); \ + case 6: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + return ColumnTuple::create(tuple_columns); \ + case 7: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + return ColumnTuple::create(tuple_columns); \ + case 8: \ + tuple_columns[0] = std::move(col0); \ + tuple_columns[1] = std::move(col1); \ + tuple_columns[2] = std::move(col2); \ + tuple_columns[3] = std::move(col3); \ + tuple_columns[4] = std::move(col4); \ + tuple_columns[5] = std::move(col5); \ + tuple_columns[6] = std::move(col6); \ + tuple_columns[7] = std::move(col7); \ + return ColumnTuple::create(tuple_columns); \ + } \ + return ColumnTuple::create(tuple_columns); + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Dec = mortonnd::MortonNDLutDecoder<2, 32, 8>(); +constexpr auto MortonND_3D_Dec = mortonnd::MortonNDLutDecoder<3, 21, 8>(); +constexpr auto MortonND_4D_Dec = mortonnd::MortonNDLutDecoder<4, 16, 8>(); +constexpr auto MortonND_5D_Dec = mortonnd::MortonNDLutDecoder<5, 12, 8>(); +constexpr auto MortonND_6D_Dec = mortonnd::MortonNDLutDecoder<6, 10, 8>(); +constexpr auto MortonND_7D_Dec = mortonnd::MortonNDLutDecoder<7, 9, 8>(); +constexpr auto MortonND_8D_Dec = mortonnd::MortonNDLutDecoder<8, 8, 8>(); +class FunctionMortonDecode : public IFunction +{ +public: + static constexpr auto name = "mortonDecode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + UInt64 tuple_size = 0; + const auto * col_const = typeid_cast(arguments[0].column.get()); + if (!col_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a constant (UInt or Tuple)", + arguments[0].type->getName(), getName()); + if (!WhichDataType(arguments[1].type).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be a native UInt", + arguments[1].type->getName(), getName()); + const auto * mask = typeid_cast(col_const->getDataColumnPtr().get()); + if (mask) + { + tuple_size = mask->tupleSize(); + } + else if (WhichDataType(arguments[0].type).isNativeUInt()) + { + tuple_size = col_const->getUInt(0); + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column type {} of function {}, should be UInt or Tuple", + arguments[0].type->getName(), getName()); + if (tuple_size > 8 || tuple_size < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal first argument for function {}, should be a number in range 1-8 or a Tuple of such size", + getName()); + if (mask) + { + const auto * type_tuple = typeid_cast(arguments[0].type.get()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + auto ratio = mask->getColumn(i).getUInt(0); + if (ratio > 8 || ratio < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} in tuple for function {}, should be a number in range 1-8", + ratio, getName()); + } + } + DataTypes types(tuple_size); + for (size_t i = 0; i < tuple_size; i++) + { + types[i] = std::make_shared(); + } + return std::make_shared(types); + } + + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D_Dec.Decode(value)); + case 3: + return std::get<2>(MortonND_3D_Dec.Decode(value)); + case 4: + return std::get<3>(MortonND_4D_Dec.Decode(value)); + case 5: + return std::get<4>(MortonND_5D_Dec.Decode(value)); + case 6: + return std::get<5>(MortonND_6D_Dec.Decode(value)); + case 7: + return std::get<6>(MortonND_7D_Dec.Decode(value)); + case 8: + return std::get<7>(MortonND_8D_Dec.Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef DECODE +#define DECODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + auto res = MortonND_##ND##D::Decode(col_code->getUInt(i)); \ + __VA_ARGS__ \ + } \ + } + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ + static UInt64 shrink(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return std::get<1>(MortonND_2D::Decode(value)); + case 3: + return std::get<2>(MortonND_3D::Decode(value)); + case 4: + return std::get<3>(MortonND_4D::Decode(value)); + case 5: + return std::get<4>(MortonND_5D::Decode(value)); + case 6: + return std::get<5>(MortonND_6D::Decode(value)); + case 7: + return std::get<6>(MortonND_7D::Decode(value)); + case 8: + return std::get<7>(MortonND_8D::Decode(value)); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) +#endif // MORTON_ND_BMI2_ENABLED + +#undef DECODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXECUTE + +class FunctionMortonDecode: public TargetSpecific::Default::FunctionMortonDecode +{ +public: + explicit FunctionMortonDecode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonDecode) +{ + factory.registerFunction({ + R"( +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts a resulting tuple size as a first argument and the code as a second argument. +[example:simple] +Will decode into: `(1,2,3,4)` +The resulting tuple size cannot be more than 8 + +Expanded: accepts a range mask (tuple) as a first argument and the code as a second argument. +Each number in mask configures the amount of range shrink +1 - no shrink +2 - 2x shrink +3 - 3x shrink +.... +Up to 8x shrink. +[example:range_shrank] +Note: see mortonEncode() docs on why range change might be beneficial. +Still limited to 8 numbers at most. + +Morton code for one argument is always the argument itself (as a tuple). +[example:identity] +Produces: `(1)` + +You can shrink one argument too: +[example:identity_shrank] +Produces: `(128)` + +The function accepts a column of codes as a second argument: +[example:from_table] + +The range tuple must be a constant: +[example:from_table_range] +)", + Documentation::Examples{ + {"simple", "SELECT mortonDecode(4, 2149)"}, + {"range_shrank", "SELECT mortonDecode((1,2), 1572864)"}, + {"identity", "SELECT mortonDecode(1, 1)"}, + {"identity_shrank", "SELECT mortonDecode(tuple(2), 32768)"}, + {"from_table", "SELECT mortonDecode(2, code) FROM table"}, + {"from_table_range", "SELECT mortonDecode((1,2), code) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp new file mode 100644 index 00000000000..4bdd237fa9c --- /dev/null +++ b/src/Functions/mortonEncode.cpp @@ -0,0 +1,393 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#if USE_MULTITARGET_CODE && defined(__BMI2__) +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +} + +#define EXTRACT_VECTOR(INDEX) \ + const ColumnPtr & col##INDEX = non_const_arguments[(INDEX) + vectorStartIndex].column; + +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D_Enc.Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#define EXPAND(IDX, ...) \ + (mask) ? expand(mask->getColumn(IDX).getUInt(0), __VA_ARGS__) : __VA_ARGS__ + +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__) & MortonND_##ND##D_Enc.InputMask()) + +#define EXECUTE() \ + size_t nd = arguments.size(); \ + size_t vectorStartIndex = 0; \ + const auto * const_col = typeid_cast(arguments[0].column.get()); \ + const ColumnTuple * mask; \ + if (const_col) \ + mask = typeid_cast(const_col->getDataColumnPtr().get()); \ + else \ + mask = typeid_cast(arguments[0].column.get()); \ + if (mask) \ + { \ + nd = mask->tupleSize(); \ + vectorStartIndex = 1; \ + for (size_t i = 0; i < nd; i++) \ + { \ + auto ratio = mask->getColumn(i).getUInt(0); \ + if (ratio > 8 || ratio < 1) \ + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, \ + "Illegal argument {} of function {}, should be a number in range 1-8", \ + arguments[0].column->getName(), getName()); \ + } \ + } \ + \ + auto non_const_arguments = arguments; \ + for (auto & argument : non_const_arguments) \ + argument.column = argument.column->convertToFullColumnIfConst(); \ + \ + auto col_res = ColumnUInt64::create(); \ + ColumnUInt64::Container & vec_res = col_res->getData(); \ + vec_res.resize(input_rows_count); \ + \ + EXTRACT_VECTOR(0) \ + if (nd == 1) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = EXPAND(0, col0->getUInt(i)); \ + } \ + return col_res; \ + } \ + \ + EXTRACT_VECTOR(1) \ + ENCODE(2, \ + MASK(2, 0, col0->getUInt(i)), \ + MASK(2, 1, col1->getUInt(i))) \ + EXTRACT_VECTOR(2) \ + ENCODE(3, \ + MASK(3, 0, col0->getUInt(i)), \ + MASK(3, 1, col1->getUInt(i)), \ + MASK(3, 2, col2->getUInt(i))) \ + EXTRACT_VECTOR(3) \ + ENCODE(4, \ + MASK(4, 0, col0->getUInt(i)), \ + MASK(4, 1, col1->getUInt(i)), \ + MASK(4, 2, col2->getUInt(i)), \ + MASK(4, 3, col3->getUInt(i))) \ + EXTRACT_VECTOR(4) \ + ENCODE(5, \ + MASK(5, 0, col0->getUInt(i)), \ + MASK(5, 1, col1->getUInt(i)), \ + MASK(5, 2, col2->getUInt(i)), \ + MASK(5, 3, col3->getUInt(i)), \ + MASK(5, 4, col4->getUInt(i))) \ + EXTRACT_VECTOR(5) \ + ENCODE(6, \ + MASK(6, 0, col0->getUInt(i)), \ + MASK(6, 1, col1->getUInt(i)), \ + MASK(6, 2, col2->getUInt(i)), \ + MASK(6, 3, col3->getUInt(i)), \ + MASK(6, 4, col4->getUInt(i)), \ + MASK(6, 5, col5->getUInt(i))) \ + EXTRACT_VECTOR(6) \ + ENCODE(7, \ + MASK(7, 0, col0->getUInt(i)), \ + MASK(7, 1, col1->getUInt(i)), \ + MASK(7, 2, col2->getUInt(i)), \ + MASK(7, 3, col3->getUInt(i)), \ + MASK(7, 4, col4->getUInt(i)), \ + MASK(7, 5, col5->getUInt(i)), \ + MASK(7, 6, col6->getUInt(i))) \ + EXTRACT_VECTOR(7) \ + ENCODE(8, \ + MASK(8, 0, col0->getUInt(i)), \ + MASK(8, 1, col1->getUInt(i)), \ + MASK(8, 2, col2->getUInt(i)), \ + MASK(8, 3, col3->getUInt(i)), \ + MASK(8, 4, col4->getUInt(i)), \ + MASK(8, 5, col5->getUInt(i)), \ + MASK(8, 6, col6->getUInt(i)), \ + MASK(8, 7, col7->getUInt(i))) \ + \ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, \ + "Illegal number of UInt arguments of function {}, max: 8", \ + getName()); \ + +DECLARE_DEFAULT_CODE( +constexpr auto MortonND_2D_Enc = mortonnd::MortonNDLutEncoder<2, 32, 8>(); +constexpr auto MortonND_3D_Enc = mortonnd::MortonNDLutEncoder<3, 21, 8>(); +constexpr auto MortonND_4D_Enc = mortonnd::MortonNDLutEncoder<4, 16, 8>(); +constexpr auto MortonND_5D_Enc = mortonnd::MortonNDLutEncoder<5, 12, 8>(); +constexpr auto MortonND_6D_Enc = mortonnd::MortonNDLutEncoder<6, 10, 8>(); +constexpr auto MortonND_7D_Enc = mortonnd::MortonNDLutEncoder<7, 9, 8>(); +constexpr auto MortonND_8D_Enc = mortonnd::MortonNDLutEncoder<8, 8, 8>(); +class FunctionMortonEncode : public IFunction +{ +public: + static constexpr auto name = "mortonEncode"; + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool isVariadic() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override + { + size_t vectorStartIndex = 0; + if (arguments.empty()) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "At least one UInt argument is required for function {}", + getName()); + if (WhichDataType(arguments[0]).isTuple()) + { + vectorStartIndex = 1; + const auto * type_tuple = typeid_cast(arguments[0].get()); + auto tuple_size = type_tuple->getElements().size(); + if (tuple_size != (arguments.size() - 1)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Illegal argument {} for function {}, tuple size should be equal to number of UInt arguments", + arguments[0]->getName(), getName()); + for (size_t i = 0; i < tuple_size; i++) + { + if (!WhichDataType(type_tuple->getElement(i)).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument in tuple for function {}, should be a native UInt", + type_tuple->getElement(i)->getName(), getName()); + } + } + + for (size_t i = vectorStartIndex; i < arguments.size(); i++) + { + const auto & arg = arguments[i]; + if (!WhichDataType(arg).isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, should be a native UInt", + arg->getName(), getName()); + } + return std::make_shared(); + } + + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D_Enc.Encode(0, value & MortonND_2D_Enc.InputMask()); + case 3: + return MortonND_3D_Enc.Encode(0, 0, value & MortonND_3D_Enc.InputMask()); + case 4: + return MortonND_4D_Enc.Encode(0, 0, 0, value & MortonND_4D_Enc.InputMask()); + case 5: + return MortonND_5D_Enc.Encode(0, 0, 0, 0, value & MortonND_5D_Enc.InputMask()); + case 6: + return MortonND_6D_Enc.Encode(0, 0, 0, 0, 0, value & MortonND_6D_Enc.InputMask()); + case 7: + return MortonND_7D_Enc.Encode(0, 0, 0, 0, 0, 0, value & MortonND_7D_Enc.InputMask()); + case 8: + return MortonND_8D_Enc.Encode(0, 0, 0, 0, 0, 0, 0, value & MortonND_8D_Enc.InputMask()); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_DEFAULT_CODE + +#if defined(MORTON_ND_BMI2_ENABLED) +#undef ENCODE +#define ENCODE(ND, ...) \ + if (nd == (ND)) \ + { \ + for (size_t i = 0; i < input_rows_count; i++) \ + { \ + vec_res[i] = MortonND_##ND##D::Encode(__VA_ARGS__); \ + } \ + return col_res; \ + } + +#undef MASK +#define MASK(ND, IDX, ...) \ + (EXPAND(IDX, __VA_ARGS__)) + +DECLARE_AVX2_SPECIFIC_CODE( +using MortonND_2D = mortonnd::MortonNDBmi<2, uint64_t>; +using MortonND_3D = mortonnd::MortonNDBmi<3, uint64_t>; +using MortonND_4D = mortonnd::MortonNDBmi<4, uint64_t>; +using MortonND_5D = mortonnd::MortonNDBmi<5, uint64_t>; +using MortonND_6D = mortonnd::MortonNDBmi<6, uint64_t>; +using MortonND_7D = mortonnd::MortonNDBmi<7, uint64_t>; +using MortonND_8D = mortonnd::MortonNDBmi<8, uint64_t>; + +class FunctionMortonEncode : public TargetSpecific::Default::FunctionMortonEncode +{ +public: + static UInt64 expand(UInt64 ratio, UInt64 value) + { + switch (ratio) + { + case 1: + return value; + case 2: + return MortonND_2D::Encode(0, value); + case 3: + return MortonND_3D::Encode(0, 0, value); + case 4: + return MortonND_4D::Encode(0, 0, 0, value); + case 5: + return MortonND_5D::Encode(0, 0, 0, 0, value); + case 6: + return MortonND_6D::Encode(0, 0, 0, 0, 0, value); + case 7: + return MortonND_7D::Encode(0, 0, 0, 0, 0, 0, value); + case 8: + return MortonND_8D::Encode(0, 0, 0, 0, 0, 0, 0, value); + } + return value; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + EXECUTE() + } +}; +) // DECLARE_AVX2_SPECIFIC_CODE +#endif // MORTON_ND_BMI2_ENABLED + +#undef ENCODE +#undef MASK +#undef EXTRACT_VECTOR +#undef EXPAND +#undef EXECUTE + +class FunctionMortonEncode: public TargetSpecific::Default::FunctionMortonEncode +{ +public: + explicit FunctionMortonEncode(ContextPtr context) : selector(context) + { + selector.registerImplementation(); + +#if USE_MULTITARGET_CODE && defined(MORTON_ND_BMI2_ENABLED) + selector.registerImplementation(); +#endif + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return selector.selectAndExecute(arguments, result_type, input_rows_count); + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + +REGISTER_FUNCTION(MortonEncode) +{ + factory.registerFunction({ + R"( +Calculates Morton encoding (ZCurve) for a list of unsigned integers + +The function has two modes of operation: +- Simple +- Expanded + +Simple: accepts up to 8 unsigned integers as arguments and produces a UInt64 code. +[example:simple] + +Expanded: accepts a range mask (tuple) as a first argument and up to 8 unsigned integers as other arguments. +Each number in mask configures the amount of range expansion +1 - no expansion +2 - 2x expansion +3 - 3x expansion +.... +Up to 8x expansion. +[example:range_expanded] +Note: tuple size must be equal to the number of the other arguments + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF) + +Morton encoding for one argument is always the argument itself. +[example:identity] +Produces: `1` + +You can expand one argument too: +[example:identity_expanded] +Produces: `32768` + +The function also accepts columns as arguments: +[example:from_table] + +But the range tuple must still be a constant: +[example:from_table_range] + +Please note that you can fit only so much bits of information into Morton code as UInt64 has. +Two arguments will have a range of maximum 2^32 (64/2) each +Three arguments: range of max 2^21 (64/3) each +And so on, all overflow will be clamped to zero +)", + Documentation::Examples{ + {"simple", "SELECT mortonEncode(1, 2, 3)"}, + {"range_expanded", "SELECT mortonEncode((1,2), 1024, 16)"}, + {"identity", "SELECT mortonEncode(1)"}, + {"identity_expanded", "SELECT mortonEncode(tuple(2), 128)"}, + {"from_table", "SELECT mortonEncode(n1, n2) FROM table"}, + {"from_table_range", "SELECT mortonEncode((1,2), n1, n2) FROM table"}, + }, + Documentation::Categories {"ZCurve", "Morton coding"} + }); +} + +} diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index c771d83225a..b1764590fda 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB @@ -74,7 +74,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return ColumnUInt32::create(input_rows_count, static_cast(time(nullptr))); + return ColumnDateTime::create(input_rows_count, static_cast(time(nullptr))); } }; diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index e986e32d76f..72d6059e0a1 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -300,11 +301,11 @@ public: throw Exception("Third argument for function " + getName() + " must be greater than zero", ErrorCodes::ILLEGAL_COLUMN); } - const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); - const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); + const auto * dt_const_starts = checkAndGetColumnConst(arguments[0].column.get()); - const auto * durations = checkAndGetColumn(arguments[1].column.get()); - const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); + const auto * durations = checkAndGetColumn(arguments[1].column.get()); + const auto * const_durations = checkAndGetColumnConst(arguments[1].column.get()); auto res = ColumnArray::create(ColumnUInt32::create()); ColumnUInt32::Container & res_values = typeid_cast(res->getData()).getData(); @@ -341,8 +342,8 @@ public: time_slot_scale = assert_cast(arguments[2].type.get())->getScale(); } - const auto * starts = checkAndGetColumn(arguments[0].column.get()); - const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); + const auto * starts = checkAndGetColumn(arguments[0].column.get()); + const auto * const_starts = checkAndGetColumnConst(arguments[0].column.get()); const auto * durations = checkAndGetColumn>(arguments[1].column.get()); const auto * const_durations = checkAndGetColumnConst>(arguments[1].column.get()); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 32fe574f56a..3054cf280d9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -437,7 +438,7 @@ private: if (which_type.isDateTime64()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); auto scale = assert_cast(from_datatype).getScale(); if (time_column_vec) @@ -445,19 +446,19 @@ private: } if (which_type.isDateTime()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 4f7ddda6b0b..6ac36dc80ed 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -82,7 +82,10 @@ public: const DataTypeTuple * tuple = checkAndGetDataType(tuple_col); if (!tuple) - throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be tuple or array of tuple. Actual {}", + getName(), + arguments[0].type->getName()); auto index = getElementNum(arguments[1].column, *tuple, number_of_arguments); if (index.has_value()) @@ -137,7 +140,10 @@ public: const DataTypeTuple * tuple_type_concrete = checkAndGetDataType(tuple_type); const ColumnTuple * tuple_col_concrete = checkAndGetColumn(tuple_col); if (!tuple_type_concrete || !tuple_col_concrete) - throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be tuple or array of tuple. Actual {}", + getName(), + first_arg.type->getName()); auto index = getElementNum(arguments[1].column, *tuple_type_concrete, arguments.size()); @@ -221,20 +227,18 @@ private: std::optional getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple, const size_t argument_size) const { - if ( - checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - || checkAndGetColumnConst(index_column.get()) - ) + if (checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get()) + || checkAndGetColumnConst(index_column.get())) { size_t index = index_column->getUInt(0); if (index == 0) - throw Exception("Indices in tuples are 1-based.", ErrorCodes::ILLEGAL_INDEX); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indices in tuples are 1-based."); if (index > tuple.getElements().size()) - throw Exception("Index for tuple element is out of range.", ErrorCodes::ILLEGAL_INDEX); + throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index for tuple element is out of range."); return std::optional(index - 1); } @@ -253,7 +257,9 @@ private: return std::nullopt; } else - throw Exception("Second argument to " + getName() + " must be a constant UInt or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument to {} must be a constant UInt or String", + getName()); } }; diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 2121747500b..2e451e0032e 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -34,8 +34,7 @@ namespace ErrorCodes template > struct Memory : boost::noncopyable, Allocator { - /// Padding is needed to allow usage of 'memcpySmallAllowReadWriteOverflow15' function with this buffer. - static constexpr size_t pad_right = 15; + static constexpr size_t pad_right = PADDING_FOR_SIMD - 1; size_t m_capacity = 0; /// With padding. size_t m_size = 0; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index 5a636971fa0..c0eb73f8638 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -28,7 +28,7 @@ void MMapReadBufferFromFileDescriptor::init() BufferBase::set(mapped.getData(), length, 0); size_t page_size = static_cast(::getPageSize()); - ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - 15); + ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); } diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index 0d31c29bdaa..503a58b65b9 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -17,7 +17,7 @@ void MMapReadBufferFromFileWithCache::init() BufferBase::set(mapped->getData(), length, 0); size_t page_size = static_cast(::getPageSize()); - ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - 15); + ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); } diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index 15283793755..45763863437 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -99,7 +99,7 @@ private: /// creation (for example if PeekableReadBuffer is often created or if we need to remember small amount of /// data after checkpoint), at the beginning we will use small amount of memory on stack and allocate /// larger buffer only if reserved memory is not enough. - char stack_memory[16]; + char stack_memory[PADDING_FOR_SIMD]; bool use_stack_memory = true; }; diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index cb341e60a8b..a9788505995 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -18,19 +18,6 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) } } -void formatUUID(const UInt8 * src16, UInt8 * dst36) -{ - formatHex(&src16[0], &dst36[0], 4); - dst36[8] = '-'; - formatHex(&src16[4], &dst36[9], 2); - dst36[13] = '-'; - formatHex(&src16[6], &dst36[14], 2); - dst36[18] = '-'; - formatHex(&src16[8], &dst36[19], 2); - dst36[23] = '-'; - formatHex(&src16[10], &dst36[24], 6); -} - /** Function used when byte ordering is important when parsing uuid * ex: When we create an UUID type */ diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 42d84e080af..39024b33eb1 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -624,9 +624,6 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf) writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf); } -template -void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); -void formatUUID(const UInt8 * src16, UInt8 * dst36); void formatUUID(std::reverse_iterator src16, UInt8 * dst36); inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf) diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 38a706997cf..a1f5b23fb97 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -15,6 +15,7 @@ struct WriteSettings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; bool is_file_cache_persistent = false; + bool s3_allow_parallel_part_upload = true; /// Monitoring bool for_object_storage = false; // to choose which profile events should be incremented diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index 8619419a47a..d760a948075 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -79,24 +79,24 @@ TEST(MemoryResizeTest, SmallInitAndSmallResize) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } { auto memory = Memory(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(0); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 0); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } } @@ -116,52 +116,52 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(2); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); - memory.resize(0x8000000000000000ULL-16); + memory.resize(0x8000000000000000ULL - PADDING_FOR_SIMD); ASSERT_TRUE(memory.m_data); ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); #ifndef ABORT_ON_LOGICAL_ERROR - EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL-15), Exception, ErrorCodes::LOGICAL_ERROR); + EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); #endif } { auto memory = Memory(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); #ifndef ABORT_ON_LOGICAL_ERROR - EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL-15), Exception, ErrorCodes::LOGICAL_ERROR); + EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); #endif } @@ -201,7 +201,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) { EXPECT_THROW_ERROR_CODE( { - auto memory = Memory(std::numeric_limits::max() - 15); + auto memory = Memory(std::numeric_limits::max() - (PADDING_FOR_SIMD - 1)); } , Exception , ErrorCodes::LOGICAL_ERROR); @@ -210,7 +210,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) { EXPECT_THROW_ERROR_CODE( { - auto memory = Memory(0x8000000000000000ULL - 15); + auto memory = Memory(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)); } , Exception , ErrorCodes::LOGICAL_ERROR); @@ -218,10 +218,10 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) #endif { - auto memory = Memory(0x8000000000000000ULL - 16); - ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); - ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - 16); + auto memory = Memory(0x8000000000000000ULL - PADDING_FOR_SIMD); + ASSERT_TRUE(memory.m_data); + ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); + ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); memory.resize(1); ASSERT_TRUE(memory.m_data); @@ -240,32 +240,32 @@ TEST(MemoryResizeTest, AlignmentWithRealAllocator) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(2); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 17); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 1); ASSERT_EQ(memory.m_size, 2); memory.resize(3); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 18); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 2); ASSERT_EQ(memory.m_size, 3); memory.resize(4); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 4); memory.resize(0); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 0); memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 19); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 3); ASSERT_EQ(memory.m_size, 1); } @@ -291,12 +291,12 @@ TEST(MemoryResizeTest, AlignmentWithRealAllocator) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); memory.resize(32); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 47); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD + 31); ASSERT_EQ(memory.m_size, 32); } } @@ -316,13 +316,12 @@ TEST(MemoryResizeTest, SomeAlignmentOverflowWhenAlignment) memory.resize(1); ASSERT_TRUE(memory.m_data); - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); EXPECT_THROW_ERROR_CODE(memory.resize(std::numeric_limits::max()), Exception, ErrorCodes::ARGUMENT_OUT_OF_BOUND); ASSERT_TRUE(memory.m_data); // state is intact after exception - ASSERT_EQ(memory.m_capacity, 16); + ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); } - } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 721d701c9a2..9c949a17e64 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -463,6 +463,18 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr delete_ddl_worker; std::unique_ptr delete_access_control; + /// Delete DDLWorker before zookeeper. + /// Cause it can call Context::getZooKeeper and resurrect it. + + { + auto lock = std::lock_guard(mutex); + delete_ddl_worker = std::move(ddl_worker); + } + + /// DDLWorker should be deleted without lock, cause its internal thread can + /// take it as well, which will cause deadlock. + delete_ddl_worker.reset(); + { auto lock = std::lock_guard(mutex); @@ -499,7 +511,6 @@ struct ContextSharedPart : boost::noncopyable delete_schedule_pool = std::move(schedule_pool); delete_distributed_schedule_pool = std::move(distributed_schedule_pool); delete_message_broker_schedule_pool = std::move(message_broker_schedule_pool); - delete_ddl_worker = std::move(ddl_worker); delete_access_control = std::move(access_control); /// Stop trace collector if any @@ -528,7 +539,6 @@ struct ContextSharedPart : boost::noncopyable delete_schedule_pool.reset(); delete_distributed_schedule_pool.reset(); delete_message_broker_schedule_pool.reset(); - delete_ddl_worker.reset(); delete_access_control.reset(); total_memory_tracker.resetOvercommitTracker(); @@ -2061,7 +2071,12 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const if (!shared->zookeeper) shared->zookeeper = std::make_shared(config, "zookeeper", getZooKeeperLog()); else if (shared->zookeeper->expired()) + { + Stopwatch watch; + LOG_DEBUG(shared->log, "Trying to establish a new connection with ZooKeeper"); shared->zookeeper = shared->zookeeper->startNewSession(); + LOG_DEBUG(shared->log, "Establishing a new connection with ZooKeeper took {} ms", watch.elapsedMilliseconds()); + } return shared->zookeeper; } @@ -3632,6 +3647,7 @@ WriteSettings Context::getWriteSettings() const res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; + res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index f1fa0955c34..6bfa9ecd591 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -532,7 +533,8 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper); /// Try fast path - auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral); + const String canary_value = Field(ServerUUID::get()).dump(); + auto create_active_res = zookeeper->tryCreate(active_node_path, canary_value, zkutil::CreateMode::Ephemeral); if (create_active_res != Coordination::Error::ZOK) { if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) @@ -563,10 +565,10 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) { /// Connection has been lost and now we are retrying, /// but our previous ephemeral node still exists. - zookeeper->waitForEphemeralToDisappearIfAny(active_node_path); + zookeeper->handleEphemeralNodeExistence(active_node_path, canary_value); } - zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral); + zookeeper->create(active_node_path, canary_value, zkutil::CreateMode::Ephemeral); } /// We must hold the lock until task execution status is committed to ZooKeeper, diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index 02b3854a47b..e148db1d8e6 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -93,6 +93,16 @@ DirectKeyValueJoin::DirectKeyValueJoin(std::shared_ptr table_join_, LOG_TRACE(log, "Using direct join"); } +DirectKeyValueJoin::DirectKeyValueJoin( + std::shared_ptr table_join_, + const Block & right_sample_block_, + std::shared_ptr storage_, + const Block & right_sample_block_with_storage_column_names_) + : DirectKeyValueJoin(table_join_, right_sample_block_, storage_) +{ + right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_; +} + bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); @@ -114,14 +124,15 @@ void DirectKeyValueJoin::joinBlock(Block & block, std::shared_ptr &) return; Block original_right_block = originalRightBlock(right_sample_block, *table_join); - const Names & attribute_names = original_right_block.getNames(); + Block right_block_to_use = right_sample_block_with_storage_column_names ? right_sample_block_with_storage_column_names : original_right_block; + const Names & attribute_names = right_block_to_use.getNames(); NullMap null_map; Chunk joined_chunk = storage->getByKeys({key_col}, null_map, attribute_names); /// Expected right block may differ from structure in storage, because of `join_use_nulls` or we just select not all joined attributes Block sample_storage_block = storage->getSampleBlock(attribute_names); - MutableColumns result_columns = convertBlockStructure(sample_storage_block, original_right_block, joined_chunk.mutateColumns(), null_map); + MutableColumns result_columns = convertBlockStructure(sample_storage_block, right_block_to_use, joined_chunk.mutateColumns(), null_map); for (size_t i = 0; i < result_columns.size(); ++i) { diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 8e82b59da02..6a6f4505474 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -25,6 +25,12 @@ public: const Block & right_sample_block_, std::shared_ptr storage_); + DirectKeyValueJoin( + std::shared_ptr table_join_, + const Block & right_sample_block_, + std::shared_ptr storage_, + const Block & right_sample_block_with_storage_column_names_); + virtual const TableJoin & getTableJoin() const override { return *table_join; } virtual bool addJoinedBlock(const Block &, bool) override; @@ -52,6 +58,7 @@ private: std::shared_ptr table_join; std::shared_ptr storage; Block right_sample_block; + Block right_sample_block_with_storage_column_names; Block sample_block_with_columns_to_add; Poco::Logger * log; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index b27df0f1c35..9b38072b5af 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1073,8 +1073,8 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_ } /// Result will also contain joined columns. - for (const auto & column_name : analyzed_join->columnsAddedByJoin()) - required_names.emplace(column_name); + for (const auto & column : analyzed_join->columnsAddedByJoin()) + required_names.emplace(column.name); for (const auto & column : result_columns) { diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 704dff325b7..ea2b9045120 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -714,7 +714,10 @@ public: /// Object was never loaded successfully and should be reloaded. startLoading(info); } - LOG_TRACE(log, "Object '{}' is neither loaded nor failed, so it will not be reloaded as outdated.", info.name); + else + { + LOG_TRACE(log, "Object '{}' is neither loaded nor failed, so it will not be reloaded as outdated.", info.name); + } } } } diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 0524feea1f6..60894d331a1 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #include - namespace DB { @@ -60,10 +60,9 @@ Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns) return block; } - BlockIO InterpreterDescribeQuery::execute() { - ColumnsDescription columns; + std::vector columns; StorageSnapshotPtr storage_snapshot; const auto & ast = query_ptr->as(); @@ -72,14 +71,34 @@ BlockIO InterpreterDescribeQuery::execute() if (table_expression.subquery) { - auto names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock( - table_expression.subquery->children.at(0), getContext()).getNamesAndTypesList(); - columns = ColumnsDescription(std::move(names_and_types)); + NamesAndTypesList names_and_types; + auto select_query = table_expression.subquery->children.at(0); + auto current_context = getContext(); + + if (settings.use_analyzer) + { + SelectQueryOptions select_query_options; + names_and_types = InterpreterSelectQueryAnalyzer(select_query, select_query_options, current_context).getSampleBlock().getNamesAndTypesList(); + } + else + { + names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList(); + } + + for (auto && [name, type] : names_and_types) + { + ColumnDescription description; + description.name = std::move(name); + description.type = std::move(type); + columns.emplace_back(std::move(description)); + } } else if (table_expression.table_function) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext()); - columns = table_function_ptr->getActualTableStructure(getContext()); + auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext()); + for (const auto & table_function_column_description : table_function_column_descriptions) + columns.emplace_back(table_function_column_description); } else { @@ -90,7 +109,9 @@ BlockIO InterpreterDescribeQuery::execute() auto metadata_snapshot = table->getInMemoryMetadataPtr(); storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext()); - columns = metadata_snapshot->getColumns(); + auto metadata_column_descriptions = metadata_snapshot->getColumns(); + for (const auto & metadata_column_description : metadata_column_descriptions) + columns.emplace_back(metadata_column_description); } bool extend_object_types = settings.describe_extend_object_types && storage_snapshot; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 4799970b6a1..6ea1b6a62b3 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,9 @@ #include +#include +#include + namespace DB { @@ -155,6 +159,30 @@ struct QueryASTSettings {"graph", graph}, {"optimize", optimize} }; + + std::unordered_map> integer_settings; +}; + +struct QueryTreeSettings +{ + bool run_passes = false; + bool dump_passes = false; + bool dump_ast = false; + Int64 passes = -1; + + constexpr static char name[] = "QUERY TREE"; + + std::unordered_map> boolean_settings = + { + {"run_passes", run_passes}, + {"dump_passes", dump_passes}, + {"dump_ast", dump_ast} + }; + + std::unordered_map> integer_settings = + { + {"passes", passes} + }; }; struct QueryPlanSettings @@ -177,6 +205,8 @@ struct QueryPlanSettings {"json", json}, {"sorting", query_plan_options.sorting}, }; + + std::unordered_map> integer_settings; }; struct QueryPipelineSettings @@ -193,18 +223,31 @@ struct QueryPipelineSettings {"graph", graph}, {"compact", compact}, }; + + std::unordered_map> integer_settings; }; template struct ExplainSettings : public Settings { using Settings::boolean_settings; + using Settings::integer_settings; bool has(const std::string & name_) const + { + return hasBooleanSetting(name_) || hasIntegerSetting(name_); + } + + bool hasBooleanSetting(const std::string & name_) const { return boolean_settings.count(name_) > 0; } + bool hasIntegerSetting(const std::string & name_) const + { + return integer_settings.count(name_) > 0; + } + void setBooleanSetting(const std::string & name_, bool value) { auto it = boolean_settings.find(name_); @@ -214,6 +257,15 @@ struct ExplainSettings : public Settings it->second.get() = value; } + void setIntegerSetting(const std::string & name_, Int64 value) + { + auto it = integer_settings.find(name_); + if (it == integer_settings.end()) + throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + + it->second.get() = value; + } + std::string getSettingsList() const { std::string res; @@ -224,6 +276,13 @@ struct ExplainSettings : public Settings res += setting.first; } + for (const auto & setting : integer_settings) + { + if (!res.empty()) + res += ", "; + + res += setting.first; + } return res; } @@ -246,15 +305,23 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) if (change.value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, - "Invalid type {} for setting \"{}\" only boolean settings are supported", + "Invalid type {} for setting \"{}\" only integer settings are supported", change.value.getTypeName(), change.name); - auto value = change.value.get(); - if (value > 1) - throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + - "\". Only boolean settings are supported", ErrorCodes::INVALID_SETTING_VALUE); + if (settings.hasBooleanSetting(change.name)) + { + auto value = change.value.get(); + if (value > 1) + throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + + "\". Expected boolean type", ErrorCodes::INVALID_SETTING_VALUE); - settings.setBooleanSetting(change.name, value); + settings.setBooleanSetting(change.name, value); + } + else + { + auto value = change.value.get(); + settings.setIntegerSetting(change.name, value); + } } return settings; @@ -304,6 +371,46 @@ QueryPipeline InterpreterExplainQuery::executeImpl() ast.getExplainedQuery()->format(IAST::FormatSettings(buf, false)); break; } + case ASTExplainQuery::QueryTree: + { + if (ast.getExplainedQuery()->as() == nullptr) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN QUERY TREE query"); + + auto settings = checkAndGetSettings(ast.getSettings()); + auto query_tree = buildQueryTree(ast.getExplainedQuery(), getContext()); + + if (settings.run_passes) + { + auto query_tree_pass_manager = QueryTreePassManager(getContext()); + addQueryTreePasses(query_tree_pass_manager); + + size_t pass_index = settings.passes < 0 ? query_tree_pass_manager.getPasses().size() : static_cast(settings.passes); + + if (settings.dump_passes) + { + query_tree_pass_manager.dump(buf, pass_index); + if (pass_index > 0) + buf << '\n'; + } + + query_tree_pass_manager.run(query_tree, pass_index); + + query_tree->dumpTree(buf); + } + else + { + query_tree->dumpTree(buf); + } + + if (settings.dump_ast) + { + buf << '\n'; + buf << '\n'; + query_tree->toAST()->format(IAST::FormatSettings(buf, false)); + } + + break; + } case ASTExplainQuery::QueryPlan: { if (!dynamic_cast(ast.getExplainedQuery().get())) @@ -312,8 +419,16 @@ QueryPipeline InterpreterExplainQuery::executeImpl() auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; - InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); - interpreter.buildQueryPlan(plan); + if (getContext()->getSettingsRef().use_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); + interpreter.buildQueryPlan(plan); + } if (settings.optimize) plan.optimize(QueryPlanOptimizationSettings::fromContext(getContext())); @@ -347,8 +462,17 @@ QueryPipeline InterpreterExplainQuery::executeImpl() auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; - InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); - interpreter.buildQueryPlan(plan); + if (getContext()->getSettingsRef().use_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext()); + plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options); + interpreter.buildQueryPlan(plan); + } + auto pipeline = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(getContext()), BuildQueryPipelineSettings::fromContext(getContext())); diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index ca0a59c0c1a..3e074ec8713 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,9 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut if (query->as()) { + if (context->getSettingsRef().use_analyzer) + return std::make_unique(query, options, context); + /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. return std::make_unique(query, context, options); @@ -125,6 +129,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut else if (query->as()) { ProfileEvents::increment(ProfileEvents::SelectQuery); + + if (context->getSettingsRef().use_analyzer) + return std::make_unique(query, options, context); + return std::make_unique(query, context, options); } else if (query->as()) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 79deb38317c..4139685c379 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -117,7 +117,8 @@ FilterDAGInfoPtr generateFilterActions( const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot, const StorageMetadataPtr & metadata_snapshot, - Names & prerequisite_columns) + Names & prerequisite_columns, + PreparedSetsPtr prepared_sets) { auto filter_info = std::make_shared(); @@ -155,7 +156,7 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); - SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot); + SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); filter_info->actions = analyzer.simpleSelectActions(); filter_info->column_name = expr_list->children.at(0)->getColumnName(); @@ -615,7 +616,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (row_policy_filter) { filter_info = generateFilterActions( - table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); query_info.filter_asts.push_back(row_policy_filter); } @@ -623,7 +625,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (query_info.additional_filter_ast) { additional_filter_info = generateFilterActions( - table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns); + table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns, + prepared_sets); additional_filter_info->do_remove_column = true; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp new file mode 100644 index 00000000000..61ec5932b7d --- /dev/null +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -0,0 +1,120 @@ +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +ASTPtr normalizeAndValidateQuery(const ASTPtr & query) +{ + if (query->as() || query->as()) + { + return query; + } + else if (auto * subquery = query->as()) + { + return subquery->children[0]; + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected ASTSelectWithUnionQuery or ASTSelectQuery. Actual {}", + query->formatForErrorMessage()); + } +} + +QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextPtr & context) +{ + auto query_tree = buildQueryTree(query, context); + + QueryTreePassManager query_tree_pass_manager(context); + addQueryTreePasses(query_tree_pass_manager); + query_tree_pass_manager.run(query_tree); + + return query_tree; +} + +} + +InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( + const ASTPtr & query_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : WithContext(context_) + , query(normalizeAndValidateQuery(query_)) + , query_tree(buildQueryTreeAndRunPasses(query, context_)) + , select_query_options(select_query_options_) + , planner(query_tree, select_query_options, context_) +{ +} + +InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( + const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : WithContext(context_) + , query(query_tree_->toAST()) + , query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner(query_tree, select_query_options, context_) +{ +} + +Block InterpreterSelectQueryAnalyzer::getSampleBlock() +{ + planner.buildQueryPlanIfNeeded(); + return planner.getQueryPlan().getCurrentDataStream().header; +} + +BlockIO InterpreterSelectQueryAnalyzer::execute() +{ + planner.buildQueryPlanIfNeeded(); + auto & query_plan = planner.getQueryPlan(); + + QueryPlanOptimizationSettings optimization_settings; + BuildQueryPipelineSettings build_pipeline_settings; + auto pipeline_builder = query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings); + + BlockIO result; + result.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder)); + + if (!select_query_options.ignore_quota && (select_query_options.to_stage == QueryProcessingStage::Complete)) + result.pipeline.setQuota(getContext()->getQuota()); + + return result; +} + +QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() && +{ + planner.buildQueryPlanIfNeeded(); + return std::move(planner).extractQueryPlan(); +} + +void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const +{ + elem.query_kind = "Select"; +} + +} diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h new file mode 100644 index 00000000000..e9884567ab0 --- /dev/null +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +class InterpreterSelectQueryAnalyzer : public IInterpreter, public WithContext +{ +public: + /// Initialize interpreter with query AST + InterpreterSelectQueryAnalyzer(const ASTPtr & query_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + /// Initialize interpreter with query tree + InterpreterSelectQueryAnalyzer(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + Block getSampleBlock(); + + BlockIO execute() override; + + QueryPlan && extractQueryPlan() &&; + + bool supportsTransactions() const override { return true; } + + bool ignoreLimits() const override { return select_query_options.ignore_limits; } + + bool ignoreQuota() const override { return select_query_options.ignore_quota; } + + void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + +private: + ASTPtr query; + QueryTreeNodePtr query_tree; + SelectQueryOptions select_query_options; + Planner planner; +}; + +} diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index a0fdafc976c..b3c2063c6f6 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -45,7 +45,7 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, SelectUnionModesSet current_set_of_modes; bool distinct_found = false; - for (ssize_t i = union_modes.size() - 1; i >= 0; --i) + for (Int64 i = union_modes.size() - 1; i >= 0; --i) { current_set_of_modes.insert(union_modes[i]); if (const auto * union_ast = typeid_cast(select_list[i + 1].get())) diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index 06600c49f13..a50e390ee5a 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -39,7 +39,6 @@ public: /// This is a temporary table for transferring to remote servers for distributed query processing. StoragePtr table; -private: /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr source; }; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 3835ef77deb..316beccae80 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -55,15 +55,26 @@ public: ASTPtr on_filter_condition_left; ASTPtr on_filter_condition_right; + std::string analyzer_left_filter_condition_column_name; + std::string analyzer_right_filter_condition_column_name; + JoinOnClause() = default; std::pair condColumnNames() const { std::pair res; + + if (!analyzer_left_filter_condition_column_name.empty()) + res.first = analyzer_left_filter_condition_column_name; + + if (!analyzer_right_filter_condition_column_name.empty()) + res.second = analyzer_right_filter_condition_column_name; + if (on_filter_condition_left) res.first = on_filter_condition_left->getColumnName(); if (on_filter_condition_right) res.second = on_filter_condition_right->getColumnName(); + return res; } @@ -111,9 +122,6 @@ private: * to the subquery will be added expression `expr(t2 columns)`. * It's possible to use name `expr(t2 columns)`. */ - - friend class TreeRewriter; - SizeLimits size_limits; const size_t default_max_bytes = 0; const bool join_use_nulls = false; @@ -124,9 +132,6 @@ private: const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; - /// the limit has no technical reasons, it supposed to improve safety - const size_t MAX_DISJUNCTS = 16; /// NOLINT - ASTs key_asts_left; ASTs key_asts_right; @@ -160,6 +165,8 @@ private: std::string right_storage_name; + bool is_join_with_constant = false; + Names requiredJoinedNames() const; /// Create converting actions and change key column names if required @@ -178,6 +185,8 @@ private: NamesAndTypesList correctedColumnsAddedByJoin() const; + void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); + public: TableJoin() = default; @@ -217,8 +226,8 @@ public: bool allowParallelHashJoin() const; bool joinUseNulls() const { return join_use_nulls; } - bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); } - bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); } + bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(kind()); } + bool forceNullableLeft() const { return join_use_nulls && isRightOrFull(kind()); } size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxJoinedBlockRows() const { return max_joined_block_rows; } size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; } @@ -229,6 +238,9 @@ public: bool oneDisjunct() const; + ASTTableJoin & getTableJoin() { return table_join; } + const ASTTableJoin & getTableJoin() const { return table_join; } + JoinOnClause & getOnlyClause() { assertHasOneOnExpr(); return clauses[0]; } const JoinOnClause & getOnlyClause() const { assertHasOneOnExpr(); return clauses[0]; } @@ -266,13 +278,26 @@ public: NamesWithAliases getNamesWithAliases(const NameSet & required_columns) const; NamesWithAliases getRequiredColumns(const Block & sample, const Names & action_required_columns) const; - void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); size_t rightKeyInclusion(const String & name) const; NameSet requiredRightKeys() const; + bool isJoinWithConstant() const + { + return is_join_with_constant; + } + + void setIsJoinWithConstant(bool is_join_with_constant_value) + { + is_join_with_constant = is_join_with_constant_value; + } + bool leftBecomeNullable(const DataTypePtr & column_type) const; bool rightBecomeNullable(const DataTypePtr & column_type) const; void addJoinedColumn(const NameAndTypePair & joined_column); + void setColumnsAddedByJoin(const NamesAndTypesList & columns_added_by_join_value) + { + columns_added_by_join = columns_added_by_join_value; + } template void addJoinedColumnsAndCorrectTypesImpl(TColumns & left_columns, bool correct_nullability); @@ -294,15 +319,13 @@ public: ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only - const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; } - - Names columnsAddedByJoin() const + void setColumnsFromJoinedTable(NamesAndTypesList columns_from_joined_table_value, const NameSet & left_table_columns, const String & right_table_prefix) { - Names res; - for (const auto & col : columns_added_by_join) - res.push_back(col.name); - return res; + columns_from_joined_table = std::move(columns_from_joined_table_value); + deduplicateAndQualifyColumnNames(left_table_columns, right_table_prefix); } + const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; } + const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; } /// StorageJoin overrides key names (cause of different names qualification) void setRightKeys(const Names & keys) { getOnlyClause().key_names_right = keys; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index ac49d79c6ba..da12dccd8d8 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -611,7 +611,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS } } -void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, ASTTableJoin & out_table_join) +void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, bool old_any, std::shared_ptr & analyzed_join) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) @@ -649,7 +649,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul throw Exception("ANY FULL JOINs are not implemented", ErrorCodes::NOT_IMPLEMENTED); } - out_table_join = table_join; + analyzed_join->getTableJoin() = table_join; } /// Evaluate expression and return boolean value if it can be interpreted as bool. @@ -1236,14 +1236,11 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if (tables_with_columns.size() > 1) { const auto & right_table = tables_with_columns[1]; - auto & cols_from_joined = result.analyzed_join->columns_from_joined_table; - cols_from_joined = right_table.columns; + auto columns_from_joined_table = right_table.columns; /// query can use materialized or aliased columns from right joined table, /// we want to request it for right table - cols_from_joined.insert(cols_from_joined.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); - - result.analyzed_join->deduplicateAndQualifyColumnNames( - source_columns_set, right_table.table.getQualifiedNamePrefix()); + columns_from_joined_table.insert(columns_from_joined_table.end(), right_table.hidden_columns.begin(), right_table.hidden_columns.end()); + result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); @@ -1254,7 +1251,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( NameSet all_source_columns_set = source_columns_set; if (table_join) { - for (const auto & [name, _] : table_join->columns_from_joined_table) + for (const auto & [name, _] : table_join->columnsFromJoinedTable()) all_source_columns_set.insert(name); } @@ -1304,7 +1301,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( getArrayJoinedColumns(query, result, select_query, result.source_columns, source_columns_set); setJoinStrictness( - *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); + *select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join); auto * table_join_ast = select_query->join() ? select_query->join()->table_join->as() : nullptr; if (table_join_ast && tables_with_columns.size() >= 2) diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 335610b2be9..7ed7788cf1d 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -20,7 +20,8 @@ std::string WindowFunctionDescription::dump() const WriteBufferFromOwnString ss; ss << "window function '" << column_name << "\n"; - ss << "function node " << function_node->dumpTree() << "\n"; + if (function_node) + ss << "function node " << function_node->dumpTree() << "\n"; ss << "aggregate function '" << aggregate_function->getName() << "'\n"; if (!function_parameters.empty()) { diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index e7bc0473c26..3b9af6575e8 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -99,7 +99,6 @@ struct WindowDescription // The window functions that are calculated for this window. std::vector window_functions; - std::string dump() const; void checkValid() const; diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index e16647091ba..48acfb5512a 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; } bool hasJoin(const ASTSelectQuery & select) @@ -118,6 +119,10 @@ Block getHeaderForProcessingStage( case QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit: case QueryProcessingStage::MAX: { + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getHeaderForProcessingStage is unsupported"); + auto query = query_info.query->clone(); TreeRewriterResult new_rewriter_result = *query_info.syntax_analyzer_result; removeJoin(*query->as(), new_rewriter_result, context); diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index 8f167f99b37..0fc6847de68 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -60,6 +60,11 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern) DB::ErrorCodes::CANNOT_COMPILE_REGEXP); } +const std::shared_ptr & ASTColumnsRegexpMatcher::getMatcher() const +{ + return column_matcher; +} + bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const { return RE2::PartialMatch(column_name, *column_matcher); @@ -114,4 +119,128 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt } } +ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const +{ + auto clone = std::make_shared(*this); + clone->cloneChildren(); + return clone; +} + +void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const +{ + const auto & qualifier = children.at(0); + qualifier->appendColumnName(ostr); + writeCString(".COLUMNS(", ostr); + writeQuotedString(original_pattern, ostr); + writeChar(')', ostr); +} + +void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern) +{ + original_pattern = std::move(pattern); + column_matcher = std::make_shared(original_pattern, RE2::Quiet); + if (!column_matcher->ok()) + throw DB::Exception( + "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); +} + +void ASTQualifiedColumnsRegexpMatcher::setMatcher(std::shared_ptr matcher) +{ + column_matcher = std::move(matcher); +} + +const std::shared_ptr & ASTQualifiedColumnsRegexpMatcher::getMatcher() const +{ + return column_matcher; +} + +void ASTQualifiedColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const +{ + hash_state.update(original_pattern.size()); + hash_state.update(original_pattern); + IAST::updateTreeHashImpl(hash_state); +} + +void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + const auto & qualifier = children.at(0); + qualifier->formatImpl(settings, state, frame); + + settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "("; + settings.ostr << quoteString(original_pattern); + settings.ostr << ")"; + + /// Format column transformers + size_t children_size = children.size(); + + for (size_t i = 1; i < children_size; ++i) + { + const auto & child = children[i]; + settings.ostr << ' '; + child->formatImpl(settings, state, frame); + } +} + +ASTPtr ASTQualifiedColumnsListMatcher::clone() const +{ + auto clone = std::make_shared(*this); + clone->column_list = column_list->clone(); + clone->cloneChildren(); + return clone; +} + +void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const +{ + const auto & qualifier = children.at(0); + qualifier->appendColumnName(ostr); + writeCString(".COLUMNS(", ostr); + + for (auto it = column_list->children.begin(); it != column_list->children.end(); ++it) + { + if (it != column_list->children.begin()) + writeCString(", ", ostr); + + (*it)->appendColumnName(ostr); + } + writeChar(')', ostr); +} + +void ASTQualifiedColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const +{ + column_list->updateTreeHash(hash_state); + IAST::updateTreeHashImpl(hash_state); +} + +void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + const auto & qualifier = children.at(0); + qualifier->formatImpl(settings, state, frame); + + settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "("; + + for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it) + { + if (it != column_list->children.begin()) + settings.ostr << ", "; + + (*it)->formatImpl(settings, state, frame); + } + settings.ostr << ")"; + + /// Format column transformers + size_t children_size = children.size(); + + for (size_t i = 1; i < children_size; ++i) + { + const auto & child = children[i]; + settings.ostr << ' '; + child->formatImpl(settings, state, frame); + } +} + } diff --git a/src/Parsers/ASTColumnsMatcher.h b/src/Parsers/ASTColumnsMatcher.h index 5aaf3cbe30d..7ce246608b9 100644 --- a/src/Parsers/ASTColumnsMatcher.h +++ b/src/Parsers/ASTColumnsMatcher.h @@ -24,6 +24,7 @@ public: void appendColumnName(WriteBuffer & ostr) const override; void setPattern(String pattern); + const std::shared_ptr & getMatcher() const; bool isColumnMatching(const String & column_name) const; void updateTreeHashImpl(SipHash & hash_state) const override; @@ -49,5 +50,39 @@ protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; +/// Same as ASTColumnsRegexpMatcher. Qualified identifier is first child. +class ASTQualifiedColumnsRegexpMatcher : public IAST +{ +public: + String getID(char) const override { return "QualifiedColumnsRegexpMatcher"; } + ASTPtr clone() const override; + + void appendColumnName(WriteBuffer & ostr) const override; + const std::shared_ptr & getMatcher() const; + void setPattern(String pattern); + void setMatcher(std::shared_ptr matcher); + void updateTreeHashImpl(SipHash & hash_state) const override; + +protected: + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + +private: + std::shared_ptr column_matcher; + String original_pattern; +}; + +/// Same as ASTColumnsListMatcher. Qualified identifier is first child. +class ASTQualifiedColumnsListMatcher : public IAST +{ +public: + String getID(char) const override { return "QualifiedColumnsListMatcher"; } + ASTPtr clone() const override; + void appendColumnName(WriteBuffer & ostr) const override; + void updateTreeHashImpl(SipHash & hash_state) const override; + + ASTPtr column_list; +protected: + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; } diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index 71207724a89..118c22b463f 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -270,6 +270,11 @@ void ASTColumnsExceptTransformer::setPattern(String pattern) DB::ErrorCodes::CANNOT_COMPILE_REGEXP); } +const std::shared_ptr & ASTColumnsExceptTransformer::getMatcher() const +{ + return column_matcher; +} + bool ASTColumnsExceptTransformer::isColumnMatching(const String & column_name) const { return RE2::PartialMatch(column_name, *column_matcher); diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h index 0f16f6b93e7..5179726e8cb 100644 --- a/src/Parsers/ASTColumnsTransformers.h +++ b/src/Parsers/ASTColumnsTransformers.h @@ -60,6 +60,7 @@ public: } void transform(ASTs & nodes) const override; void setPattern(String pattern); + const std::shared_ptr & getMatcher() const; bool isColumnMatching(const String & column_name) const; void appendColumnName(WriteBuffer & ostr) const override; void updateTreeHashImpl(SipHash & hash_state) const override; diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index ea9ccf5a4f4..156ffdeacb9 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -15,6 +15,7 @@ public: { ParsedAST, /// 'EXPLAIN AST SELECT ...' AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...' + QueryTree, /// 'EXPLAIN QUERY TREE SELECT ...' QueryPlan, /// 'EXPLAIN SELECT ...' QueryPipeline, /// 'EXPLAIN PIPELINE ...' QueryEstimates, /// 'EXPLAIN ESTIMATE ...' @@ -109,6 +110,7 @@ private: { case ParsedAST: return "EXPLAIN AST"; case AnalyzedSyntax: return "EXPLAIN SYNTAX"; + case QueryTree: return "EXPLAIN QUERY TREE"; case QueryPlan: return "EXPLAIN"; case QueryPipeline: return "EXPLAIN PIPELINE"; case QueryEstimates: return "EXPLAIN ESTIMATE"; diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 6d5089f802e..5756fb9ba86 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -24,6 +24,8 @@ public: bool compute_after_window_functions = false; + bool is_lambda_function = false; + // We have to make these fields ASTPtr because this is what the visitors // expect. Some of them take const ASTPtr & (makes no sense), and some // take ASTPtr & and modify it. I don't understand how the latter is diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index 14e2fcef39d..c9712d578e0 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -49,9 +49,10 @@ public: void restoreTable(); // TODO(ilezhankin): get rid of this std::shared_ptr createTable() const; // returns |nullptr| if identifier is not table. -protected: String full_name; std::vector name_parts; + +protected: std::shared_ptr semantic; /// pimpl void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSampleRatio.h b/src/Parsers/ASTSampleRatio.h index a3e70b7dab7..220f938335b 100644 --- a/src/Parsers/ASTSampleRatio.h +++ b/src/Parsers/ASTSampleRatio.h @@ -34,4 +34,14 @@ public: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; +inline bool operator==(const ASTSampleRatio::Rational & lhs, const ASTSampleRatio::Rational & rhs) +{ + return lhs.numerator == rhs.numerator && lhs.denominator == rhs.denominator; +} + +inline bool operator!=(const ASTSampleRatio::Rational & lhs, const ASTSampleRatio::Rational & rhs) +{ + return !(lhs == rhs); +} + } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 88784329ece..c4e07ea2e15 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1192,54 +1192,6 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } - -bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword columns("COLUMNS"); - ParserList columns_p(std::make_unique(false, true), std::make_unique(TokenType::Comma), false); - ParserStringLiteral regex; - - if (!columns.ignore(pos, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - ASTPtr column_list; - ASTPtr regex_node; - if (!columns_p.parse(pos, column_list, expected) && !regex.parse(pos, regex_node, expected)) - return false; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - ASTPtr res; - if (column_list) - { - auto list_matcher = std::make_shared(); - list_matcher->column_list = column_list; - res = list_matcher; - } - else - { - auto regexp_matcher = std::make_shared(); - regexp_matcher->setPattern(regex_node->as().value.get()); - res = regexp_matcher; - } - - ParserColumnsTransformers transformers_p(allowed_transformers); - ASTPtr transformer; - while (transformers_p.parse(pos, transformer, expected)) - { - res->children.push_back(transformer); - } - node = std::move(res); - return true; -} - - bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword apply("APPLY"); @@ -1488,6 +1440,122 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +/// Parse (columns_list) or ('REGEXP'). +static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected & expected, ParserColumnsTransformers::ColumnTransformers allowed_transformers) +{ + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + ParserList columns_p(std::make_unique(false, true), std::make_unique(TokenType::Comma), false); + ParserStringLiteral regex; + + ASTPtr column_list; + ASTPtr regex_node; + if (!columns_p.parse(pos, column_list, expected) && !regex.parse(pos, regex_node, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + ASTPtr res; + if (column_list) + { + auto list_matcher = std::make_shared(); + list_matcher->column_list = column_list; + res = list_matcher; + } + else + { + auto regexp_matcher = std::make_shared(); + regexp_matcher->setPattern(regex_node->as().value.get()); + res = regexp_matcher; + } + + ParserColumnsTransformers transformers_p(allowed_transformers); + ASTPtr transformer; + while (transformers_p.parse(pos, transformer, expected)) + { + res->children.push_back(transformer); + } + + node = std::move(res); + return true; +} + +bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword columns("COLUMNS"); + + if (!columns.ignore(pos, expected)) + return false; + + return parseColumnsMatcherBody(pos, node, expected, allowed_transformers); +} + +bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected)) + return false; + + auto identifier_node = node; + const auto & identifier_node_typed = identifier_node->as(); + + /// ParserCompoundIdentifier parse identifier.COLUMNS + if (identifier_node_typed.name_parts.size() == 1 || identifier_node_typed.name_parts.back() != "COLUMNS") + return false; + + /// TODO: ASTTableIdentifier can contain only 2 parts + + if (identifier_node_typed.name_parts.size() == 2) + { + auto table_name = identifier_node_typed.name_parts[0]; + identifier_node = std::make_shared(table_name); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected identifier to contain no more than 2 parts. Actual {}", + identifier_node_typed.full_name); + } + + if (!parseColumnsMatcherBody(pos, node, expected, allowed_transformers)) + return false; + + if (auto * columns_list_matcher = node->as()) + { + auto result = std::make_shared(); + result->column_list = std::move(columns_list_matcher->column_list); + + result->children.reserve(columns_list_matcher->children.size() + 1); + result->children.push_back(std::move(identifier_node)); + + for (auto && child : columns_list_matcher->children) + result->children.push_back(std::move(child)); + + node = result; + } + else if (auto * column_regexp_matcher = node->as()) + { + auto result = std::make_shared(); + result->setMatcher(column_regexp_matcher->getMatcher()); + + result->children.reserve(column_regexp_matcher->children.size() + 1); + result->children.push_back(std::move(identifier_node)); + + for (auto && child : column_regexp_matcher->children) + result->children.push_back(std::move(child)); + + node = result; + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Qualified COLUMNS matcher expected to be list or regexp"); + } + + return true; +} bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index f538555f0c1..8a9647dc86f 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -104,7 +104,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; -/** COLUMNS('') +/** COLUMNS(columns_names) or COLUMNS('') */ class ParserColumnsMatcher : public IParserBase { @@ -121,6 +121,23 @@ protected: ColumnTransformers allowed_transformers; }; +/** Qualified columns matcher identifier.COLUMNS(columns_names) or identifier.COLUMNS('') + */ +class ParserQualifiedColumnsMatcher : public IParserBase +{ +public: + using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers; + explicit ParserQualifiedColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) + : allowed_transformers(allowed_transformers_) + {} + +protected: + const char * getName() const override { return "qualified COLUMNS matcher"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + ColumnTransformers allowed_transformers; +}; + // Allows to make queries like SELECT SUM() FILTER(WHERE ) FROM ... class ParserFilterClause : public IParserBase { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 1249baf1859..4af4dabb12e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -478,7 +478,10 @@ struct Operator { Operator() = default; - Operator(const std::string & function_name_, int priority_, int arity_ = 2, OperatorType type_ = OperatorType::None) + Operator(const std::string & function_name_, + int priority_, + int arity_, + OperatorType type_ = OperatorType::None) : type(type_), priority(priority_), arity(arity_), function_name(function_name_) {} OperatorType type; @@ -487,6 +490,14 @@ struct Operator std::string function_name; }; +template +static std::shared_ptr makeASTFunction(Operator & op, Args &&... args) +{ + auto ast_function = makeASTFunction(op.function_name, std::forward(args)...); + ast_function->is_lambda_function = op.type == OperatorType::Lambda; + return ast_function; +} + enum class Checkpoint { None, @@ -506,10 +517,8 @@ enum class Checkpoint class Layer { public: - explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : - allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) - { - } + explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = false) : + allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} virtual ~Layer() = default; @@ -612,13 +621,17 @@ public: /// bool mergeElement(bool push_to_elements = true) { + parsed_alias = false; + Operator cur_op; while (popOperator(cur_op)) { ASTPtr function; - // Special case of ternary operator - if (cur_op.type == OperatorType::StartIf) + // We should not meet the starting part of the operator while finishing an element + if (cur_op.type == OperatorType::StartIf || + cur_op.type == OperatorType::StartBetween || + cur_op.type == OperatorType::StartNotBetween) return false; if (cur_op.type == OperatorType::FinishIf) @@ -628,10 +641,6 @@ public: return false; } - // Special case of a BETWEEN b AND c operator - if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) - return false; - if (cur_op.type == OperatorType::FinishBetween) { Operator tmp_op; @@ -651,7 +660,7 @@ public: } else { - function = makeASTFunction(cur_op.function_name); + function = makeASTFunction(cur_op); if (!popLastNOperands(function->children[0]->children, cur_op.arity)) return false; @@ -727,6 +736,9 @@ public: /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. int between_counter = 0; + /// Flag we set when we parsed alias to avoid parsing next element as alias + bool parsed_alias = false; + bool allow_alias = true; bool allow_alias_without_as_keyword = true; @@ -776,16 +788,18 @@ public: } }; - /// Basic layer for a function with certain separator and end tokens: /// 1. If we parse a separator we should merge current operands and operators /// into one element and push in to 'elements' vector. /// 2. If we parse an ending token, we should merge everything as in (1) and /// also set 'finished' flag. template -class BaseLayer : public Layer +class LayerWithSeparator : public Layer { public: + explicit LayerWithSeparator(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = false) : + Layer(allow_alias_, allow_alias_without_as_keyword_) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { if (ParserToken(separator).ignore(pos, expected)) @@ -809,11 +823,11 @@ public: } }; - -class OrdinaryFunctionLayer : public Layer +/// Layer for regular and aggregate functions without syntax sugar +class FunctionLayer : public Layer { public: - explicit OrdinaryFunctionLayer(String function_name_, bool allow_function_parameters_ = true) + explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true) : function_name(function_name_), allow_function_parameters(allow_function_parameters_){} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override @@ -958,7 +972,7 @@ public: if (parameters) { - function_node->parameters = parameters; + function_node->parameters = std::move(parameters); function_node->children.push_back(function_node->parameters); } @@ -991,7 +1005,7 @@ public: return false; } - elements = {function_node}; + elements = {std::move(function_node)}; finished = true; } @@ -1060,7 +1074,7 @@ private: }; /// Layer for array square brackets operator -class ArrayLayer : public BaseLayer +class ArrayLayer : public LayerWithSeparator { public: bool getResult(ASTPtr & node) override @@ -1071,25 +1085,27 @@ public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } }; /// Layer for arrayElement square brackets operator /// This layer does not create a function, it is only needed to parse closing token /// and return only one element. -class ArrayElementLayer : public BaseLayer +class ArrayElementLayer : public LayerWithSeparator { public: bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } }; class CastLayer : public Layer { public: + CastLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// CAST(x [AS alias1], T [AS alias2]) or CAST(x [AS alias1] AS T) @@ -1185,9 +1201,11 @@ public: } }; -class ExtractLayer : public BaseLayer +class ExtractLayer : public LayerWithSeparator { public: + ExtractLayer() : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool getResult(ASTPtr & node) override { if (state == 2) @@ -1232,7 +1250,7 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } if (state == 2) @@ -1257,6 +1275,8 @@ private: class SubstringLayer : public Layer { public: + SubstringLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool getResult(ASTPtr & node) override { node = makeASTFunction("substring", std::move(elements)); @@ -1317,6 +1337,8 @@ public: class PositionLayer : public Layer { public: + PositionLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool getResult(ASTPtr & node) override { if (state == 2) @@ -1382,10 +1404,11 @@ public: } }; - class ExistsLayer : public Layer { public: + ExistsLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { ASTPtr node; @@ -1410,9 +1433,8 @@ public: class TrimLayer : public Layer { public: - TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) - { - } + TrimLayer(bool trim_left_, bool trim_right_) + : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true), trim_left(trim_left_), trim_right(trim_right_) {} bool getResult(ASTPtr & node) override { @@ -1570,13 +1592,11 @@ private: String function_name; }; - -class DateAddLayer : public BaseLayer +class DateAddLayer : public LayerWithSeparator { public: - explicit DateAddLayer(const char * function_name_) : function_name(function_name_) - { - } + explicit DateAddLayer(const char * function_name_) + : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true), function_name(function_name_) {} bool getResult(ASTPtr & node) override { @@ -1618,7 +1638,7 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } return true; @@ -1630,10 +1650,11 @@ private: bool parsed_interval_kind = false; }; - -class DateDiffLayer : public BaseLayer +class DateDiffLayer : public LayerWithSeparator { public: + DateDiffLayer() : LayerWithSeparator(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool getResult(ASTPtr & node) override { if (parsed_interval_kind) @@ -1672,7 +1693,7 @@ public: if (state == 1) { - return BaseLayer::parse(pos, expected, action); + return LayerWithSeparator::parse(pos, expected, action); } return true; @@ -1683,10 +1704,11 @@ private: bool parsed_interval_kind = false; }; - class IntervalLayer : public Layer { public: + IntervalLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// INTERVAL 1 HOUR or INTERVAL expr HOUR @@ -1761,86 +1783,11 @@ private: IntervalKind interval_kind; }; -/// Layer for table function 'view' and 'viewIfPermitted' -class ViewLayer : public Layer -{ -public: - explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} - - bool getResult(ASTPtr & node) override - { - if (if_permitted) - node = makeASTFunction("viewIfPermitted", std::move(elements)); - else - node = makeASTFunction("view", std::move(elements)); - - return true; - } - - bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override - { - /// view(SELECT ...) - /// viewIfPermitted(SELECT ... ELSE func(...)) - /// - /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) - /// 1. Parse closing token - - if (state == 0) - { - ASTPtr query; - - bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) - return false; - - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - return false; - } - - pushResult(query); - - if (!if_permitted) - { - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - finished = true; - return true; - } - - if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) - return false; - - state = 1; - return true; - } - - if (state == 1) - { - if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - { - if (!mergeElement()) - return false; - - finished = true; - } - } - - return true; - } - -private: - bool if_permitted; -}; - - class CaseLayer : public Layer { public: + CaseLayer() : Layer(/*allow_alias*/ true, /*allow_alias_without_as_keyword*/ true) {} + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { /// CASE [x] WHEN expr THEN expr [WHEN expr THEN expr [...]] [ELSE expr] END @@ -1929,6 +1876,82 @@ private: bool has_case_expr; }; +/// Layer for table function 'view' and 'viewIfPermitted' +class ViewLayer : public Layer +{ +public: + explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + + bool getResult(ASTPtr & node) override + { + if (if_permitted) + node = makeASTFunction("viewIfPermitted", std::move(elements)); + else + node = makeASTFunction("view", std::move(elements)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + /// view(SELECT ...) + /// viewIfPermitted(SELECT ... ELSE func(...)) + /// + /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) + /// 1. Parse closing token + + if (state == 0) + { + ASTPtr query; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + pushResult(query); + + if (!if_permitted) + { + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + finished = true; + return true; + } + + if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) + return false; + + state = 1; + return true; + } + + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } + +private: + bool if_permitted; +}; + std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_function, bool allow_function_parameters_ = true) { @@ -1993,9 +2016,9 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") return std::make_unique(); else if (function_name_lowercase == "grouping") - return std::make_unique(function_name_lowercase, allow_function_parameters_); + return std::make_unique(function_name_lowercase, allow_function_parameters_); else - return std::make_unique(function_name, allow_function_parameters_); + return std::make_unique(function_name, allow_function_parameters_); } @@ -2079,6 +2102,7 @@ struct ParserExpressionImpl // Recursion ParserQualifiedAsterisk qualified_asterisk_parser; ParserColumnsMatcher columns_matcher_parser; + ParserQualifiedColumnsMatcher qualified_columns_matcher_parser; ParserSubquery subquery_parser; bool parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected); @@ -2144,22 +2168,22 @@ std::vector> ParserExpressionImpl::operators_t {"<", Operator("less", 9, 2, OperatorType::Comparison)}, {">", Operator("greater", 9, 2, OperatorType::Comparison)}, {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, - {"LIKE", Operator("like", 9)}, - {"ILIKE", Operator("ilike", 9)}, - {"NOT LIKE", Operator("notLike", 9)}, - {"NOT ILIKE", Operator("notILike", 9)}, - {"IN", Operator("in", 9)}, - {"NOT IN", Operator("notIn", 9)}, - {"GLOBAL IN", Operator("globalIn", 9)}, - {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, + {"LIKE", Operator("like", 9, 2)}, + {"ILIKE", Operator("ilike", 9, 2)}, + {"NOT LIKE", Operator("notLike", 9, 2)}, + {"NOT ILIKE", Operator("notILike", 9, 2)}, + {"IN", Operator("in", 9, 2)}, + {"NOT IN", Operator("notIn", 9, 2)}, + {"GLOBAL IN", Operator("globalIn", 9, 2)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 9, 2)}, {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, - {"+", Operator("plus", 11)}, - {"-", Operator("minus", 11)}, - {"*", Operator("multiply", 12)}, - {"/", Operator("divide", 12)}, - {"%", Operator("modulo", 12)}, - {"MOD", Operator("modulo", 12)}, - {"DIV", Operator("intDiv", 12)}, + {"+", Operator("plus", 11, 2)}, + {"-", Operator("minus", 11, 2)}, + {"*", Operator("multiply", 12, 2)}, + {"/", Operator("divide", 12, 2)}, + {"%", Operator("modulo", 12, 2)}, + {"MOD", Operator("modulo", 12, 2)}, + {"DIV", Operator("intDiv", 12, 2)}, {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, @@ -2307,7 +2331,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos if (!layers.back()->popOperand(argument)) return Action::NONE; - function = makeASTFunction(prev_op.function_name, argument, tmp); + function = makeASTFunction(prev_op, argument, tmp); if (!modifyAST(function, subquery_function_type)) return Action::NONE; @@ -2356,7 +2380,8 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos literal_parser.parse(pos, tmp, expected) || asterisk_parser.parse(pos, tmp, expected) || qualified_asterisk_parser.parse(pos, tmp, expected) || - columns_matcher_parser.parse(pos, tmp, expected)) + columns_matcher_parser.parse(pos, tmp, expected) || + qualified_columns_matcher_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); } @@ -2430,11 +2455,15 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po if (cur_op == operators_table.end()) { + ParserAlias alias_parser(layers.back()->allow_alias_without_as_keyword); auto old_pos = pos; - if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) + if (layers.back()->allow_alias && + !layers.back()->parsed_alias && + alias_parser.parse(pos, tmp, expected) && + layers.back()->insertAlias(tmp)) { - if (layers.back()->insertAlias(tmp)) - return Action::OPERATOR; + layers.back()->parsed_alias = true; + return Action::OPERATOR; } pos = old_pos; return Action::NONE; @@ -2491,7 +2520,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po } else { - function = makeASTFunction(prev_op.function_name); + function = makeASTFunction(prev_op); if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) return Action::NONE; diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index 0f768e22324..ad6d2c5bcc6 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -33,7 +33,8 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (!ParserTableExpression().parse(pos, table_expression, expected)) return false; - query->table_expression = table_expression; + query->children.push_back(std::move(table_expression)); + query->table_expression = query->children.back(); node = query; diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index d32d4444c36..7fc997f9548 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -19,6 +19,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_ast("AST"); ParserKeyword s_explain("EXPLAIN"); ParserKeyword s_syntax("SYNTAX"); + ParserKeyword s_query_tree("QUERY TREE"); ParserKeyword s_pipeline("PIPELINE"); ParserKeyword s_plan("PLAN"); ParserKeyword s_estimates("ESTIMATE"); @@ -33,6 +34,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected kind = ASTExplainQuery::ExplainKind::ParsedAST; else if (s_syntax.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax; + else if (s_query_tree.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::QueryTree; else if (s_pipeline.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::QueryPipeline; else if (s_plan.ignore(pos, expected)) @@ -84,6 +87,13 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->setTableFunction(table_function); explain_query->setTableOverride(table_override); } + else if (kind == ASTExplainQuery::ExplainKind::QueryTree) + { + if (select_p.parse(pos, query, expected)) + explain_query->setExplainedQuery(std::move(query)); + else + return false; + } else if (kind == ASTExplainQuery::ExplainKind::CurrentTransaction) { /// Nothing to parse @@ -103,7 +113,9 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->setExplainedQuery(std::move(query)); } else + { return false; + } node = std::move(explain_query); return true; diff --git a/src/Parsers/SelectUnionMode.cpp b/src/Parsers/SelectUnionMode.cpp new file mode 100644 index 00000000000..6d56a2b219f --- /dev/null +++ b/src/Parsers/SelectUnionMode.cpp @@ -0,0 +1,32 @@ +#include + + +namespace DB +{ + +const char * toString(SelectUnionMode mode) +{ + switch (mode) + { + case SelectUnionMode::UNION_DEFAULT: + return "UNION_DEFAULT"; + case SelectUnionMode::UNION_ALL: + return "UNION_ALL"; + case SelectUnionMode::UNION_DISTINCT: + return "UNION_DISTINCT"; + case SelectUnionMode::EXCEPT_DEFAULT: + return "EXCEPT_DEFAULT"; + case SelectUnionMode::EXCEPT_ALL: + return "EXCEPT_ALL"; + case SelectUnionMode::EXCEPT_DISTINCT: + return "EXCEPT_DISTINCT"; + case SelectUnionMode::INTERSECT_DEFAULT: + return "INTERSECT_DEFAULT"; + case SelectUnionMode::INTERSECT_ALL: + return "INTERSECT_ALL"; + case SelectUnionMode::INTERSECT_DISTINCT: + return "INTERSECT_DEFAULT"; + } +} + +} diff --git a/src/Parsers/SelectUnionMode.h b/src/Parsers/SelectUnionMode.h index ca3637612aa..5c72ce65eb2 100644 --- a/src/Parsers/SelectUnionMode.h +++ b/src/Parsers/SelectUnionMode.h @@ -18,6 +18,8 @@ enum class SelectUnionMode INTERSECT_DISTINCT }; +const char * toString(SelectUnionMode mode); + using SelectUnionModes = std::vector; using SelectUnionModesSet = std::unordered_set; diff --git a/src/Planner/ActionsChain.cpp b/src/Planner/ActionsChain.cpp new file mode 100644 index 00000000000..594d26a679c --- /dev/null +++ b/src/Planner/ActionsChain.cpp @@ -0,0 +1,170 @@ +#include + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, AvailableOutputColumnsStrategy available_output_columns_stategy_) + : actions(std::move(actions_)) + , available_output_columns_strategy(available_output_columns_stategy_) +{ + initialize(); +} + +ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, + AvailableOutputColumnsStrategy available_output_columns_stategy_, + ColumnsWithTypeAndName additional_output_columns_) + : actions(std::move(actions_)) + , available_output_columns_strategy(available_output_columns_stategy_) + , additional_output_columns(std::move(additional_output_columns_)) +{ + initialize(); +} + + +void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input_columns) +{ + child_required_output_columns_names.clear(); + + auto child_input_columns_copy = child_input_columns; + + std::unordered_set output_nodes_names; + output_nodes_names.reserve(actions->getOutputs().size()); + + for (auto & output_node : actions->getOutputs()) + output_nodes_names.insert(output_node->result_name); + + for (const auto & node : actions->getNodes()) + { + auto it = child_input_columns_copy.find(node.result_name); + if (it == child_input_columns_copy.end()) + continue; + + child_input_columns_copy.erase(it); + child_required_output_columns_names.insert(node.result_name); + + if (output_nodes_names.contains(node.result_name)) + continue; + + actions->getOutputs().push_back(&node); + output_nodes_names.insert(node.result_name); + } + + actions->removeUnusedActions(); + /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name + actions->projectInput(); + initialize(); +} + +void ActionsChainStep::dump(WriteBuffer & buffer) const +{ + buffer << "DAG" << '\n'; + buffer << actions->dumpDAG(); + + if (!additional_output_columns.empty()) + { + buffer << "Additional output columns " << additional_output_columns.size() << '\n'; + for (const auto & column : additional_output_columns) + buffer << "Name " << column.name << " type " << column.type->getName() << '\n'; + } + + if (!child_required_output_columns_names.empty()) + { + buffer << "Child required output columns " << boost::join(child_required_output_columns_names, ", "); + buffer << '\n'; + } +} + +String ActionsChainStep::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +void ActionsChainStep::initialize() +{ + auto required_columns_names = actions->getRequiredColumnsNames(); + input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end()); + + available_output_columns.clear(); + + /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name + std::unordered_set available_output_columns_names; + + if (available_output_columns_strategy == AvailableOutputColumnsStrategy::ALL_NODES) + { + for (const auto & node : actions->getNodes()) + { + if (available_output_columns_names.contains(node.result_name)) + continue; + + available_output_columns.emplace_back(node.column, node.result_type, node.result_name); + available_output_columns_names.insert(node.result_name); + } + } + else if (available_output_columns_strategy == AvailableOutputColumnsStrategy::OUTPUT_NODES) + { + for (const auto & node : actions->getOutputs()) + { + if (available_output_columns_names.contains(node->result_name)) + continue; + + available_output_columns.emplace_back(node->column, node->result_type, node->result_name); + available_output_columns_names.insert(node->result_name); + } + } + + available_output_columns.insert(available_output_columns.end(), additional_output_columns.begin(), additional_output_columns.end()); +} + +void ActionsChain::finalize() +{ + if (steps.empty()) + return; + + /// For last chain step there are no columns required in child nodes + NameSet empty_child_input_columns; + steps.back().get()->finalizeInputAndOutputColumns(empty_child_input_columns); + + Int64 steps_last_index = steps.size() - 1; + for (Int64 i = steps_last_index; i >= 1; --i) + { + auto & current_step = steps[i]; + auto & previous_step = steps[i - 1]; + + previous_step->finalizeInputAndOutputColumns(current_step->getInputColumnNames()); + } +} + +void ActionsChain::dump(WriteBuffer & buffer) const +{ + size_t steps_size = steps.size(); + + for (size_t i = 0; i < steps_size; ++i) + { + const auto & step = steps[i]; + buffer << "Step " << i << '\n'; + step->dump(buffer); + + buffer << '\n'; + } +} + +String ActionsChain::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +} diff --git a/src/Planner/ActionsChain.h b/src/Planner/ActionsChain.h new file mode 100644 index 00000000000..e2791ab7e35 --- /dev/null +++ b/src/Planner/ActionsChain.h @@ -0,0 +1,239 @@ +#pragma once + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Chain of query actions steps. This class is needed to eliminate unnecessary actions calculations. + * Each step is represented by actions DAG. + * + * Consider such example query: + * SELECT expr(id) FROM test_table WHERE expr(id) > 0. + * + * We want to reuse expr(id) from previous expressions step, and not recalculate it in projection. + * To do this we build a chain of all query action steps. + * For example: + * 1. Before where. + * 2. Before order by. + * 3. Projection. + * + * Initially root of chain is initialized with join tree query plan header. + * Each next chain step, must be initialized with previous step available output columns. + * That way we forward all available output columns (functions, columns, aliases) from first step of the chain to the + * last step. After chain is build we can finalize it. + * + * Each step has input columns (some of them are not necessary) and output columns. Before chain finalize output columns + * contain only necessary actions for step output calculation. + * For each step starting from last (i), we add columns that are necessary for this step to previous step (i - 1), + * and remove unused input columns of previous step(i - 1). + * That way we reuse already calculated expressions from first step to last step. + */ + +class ActionsChainStep; +using ActionsChainStepPtr = std::unique_ptr; +using ActionsChainSteps = std::vector; + +/// Actions chain step represent single step in actions chain. +class ActionsChainStep +{ +public: + /// Available output columns strategy for actions chain step + enum class AvailableOutputColumnsStrategy + { + ALL_NODES, + OUTPUT_NODES + }; + + /** Initialize actions step with actions dag. + * Input column names initialized using actions dag nodes with INPUT type. + * + * If available output columns strategy is ALL_NODES, then available output columns initialized using actions dag nodes. + * If available output columns strategy is OUTPUT_NODES, then available output columns initialized using actions dag output nodes. + */ + explicit ActionsChainStep(ActionsDAGPtr actions_, AvailableOutputColumnsStrategy available_output_columns_stategy_ = AvailableOutputColumnsStrategy::ALL_NODES); + + explicit ActionsChainStep(ActionsDAGPtr actions_, + AvailableOutputColumnsStrategy available_output_columns_stategy_, + ColumnsWithTypeAndName additional_output_columns_); + + /// Get actions + ActionsDAGPtr & getActions() + { + return actions; + } + + /// Get actions + const ActionsDAGPtr & getActions() const + { + return actions; + } + + /// Get available output columns + const ColumnsWithTypeAndName & getAvailableOutputColumns() const + { + return available_output_columns; + } + + /// Get input column names + const NameSet & getInputColumnNames() const + { + return input_columns_names; + } + + /** Get child required output columns names. + * Initialized during finalizeOutputColumns method call. + */ + const NameSet & getChildRequiredOutputColumnsNames() const + { + return child_required_output_columns_names; + } + + /** Finalize step output columns and remove unnecessary input columns. + * If actions dag node has same name as child input column, it is added to actions output nodes. + */ + void finalizeInputAndOutputColumns(const NameSet & child_input_columns); + + /// Dump step into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump step + String dump() const; + +private: + void initialize(); + + ActionsDAGPtr actions; + + AvailableOutputColumnsStrategy available_output_columns_strategy; + + NameSet input_columns_names; + + NameSet child_required_output_columns_names; + + ColumnsWithTypeAndName available_output_columns; + + ColumnsWithTypeAndName additional_output_columns; +}; + +/// Query actions chain +class ActionsChain +{ +public: + /// Add step into actions chain + void addStep(ActionsChainStepPtr step) + { + steps.emplace_back(std::move(step)); + } + + /// Get steps + const ActionsChainSteps & getSteps() const + { + return steps; + } + + /// Get steps size + size_t getStepsSize() const + { + return steps.size(); + } + + const ActionsChainStepPtr & at(size_t index) const + { + if (index >= steps.size()) + throw std::out_of_range("actions chain access is out of range"); + + return steps[index]; + } + + ActionsChainStepPtr & at(size_t index) + { + if (index >= steps.size()) + throw std::out_of_range("actions chain access is out of range"); + + return steps[index]; + } + + ActionsChainStepPtr & operator[](size_t index) + { + return steps[index]; + } + + const ActionsChainStepPtr & operator[](size_t index) const + { + return steps[index]; + } + + /// Get last step + ActionsChainStep * getLastStep() + { + return steps.back().get(); + } + + /// Get last step or throw exception if chain is empty + ActionsChainStep * getLastStepOrThrow() + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.back().get(); + } + + /// Get last step index + size_t getLastStepIndex() + { + return steps.size() - 1; + } + + /// Get last step index or throw exception if chain is empty + size_t getLastStepIndexOrThrow() + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.size() - 1; + } + + /// Get last step available output columns + const ColumnsWithTypeAndName & getLastStepAvailableOutputColumns() const + { + return steps.back()->getAvailableOutputColumns(); + } + + /// Get last step available output columns or throw exception if chain is empty + const ColumnsWithTypeAndName & getLastStepAvailableOutputColumnsOrThrow() const + { + if (steps.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty"); + + return steps.back()->getAvailableOutputColumns(); + } + + /// Get last step available output columns or null if chain is empty + const ColumnsWithTypeAndName * getLastStepAvailableOutputColumnsOrNull() const + { + if (steps.empty()) + return nullptr; + + return &steps.back()->getAvailableOutputColumns(); + } + + /// Finalize chain + void finalize(); + + /// Dump chain into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump chain + String dump() const; + +private: + ActionsChainSteps steps; +}; + +} diff --git a/src/Planner/CMakeLists.txt b/src/Planner/CMakeLists.txt new file mode 100644 index 00000000000..766767b5c13 --- /dev/null +++ b/src/Planner/CMakeLists.txt @@ -0,0 +1,7 @@ +if (ENABLE_TESTS) + add_subdirectory(tests) +endif() + +if (ENABLE_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp new file mode 100644 index 00000000000..aa7014aba48 --- /dev/null +++ b/src/Planner/CollectSets.cpp @@ -0,0 +1,101 @@ +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +class CollectSetsVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit CollectSetsVisitor(PlannerContext & planner_context_) + : planner_context(planner_context_) + {} + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || !isNameOfInFunction(function_node->getFunctionName())) + return; + + auto in_first_argument = function_node->getArguments().getNodes().at(0); + auto in_second_argument = function_node->getArguments().getNodes().at(1); + auto in_second_argument_node_type = in_second_argument->getNodeType(); + + const auto & settings = planner_context.getQueryContext()->getSettingsRef(); + + String set_key = planner_context.createSetKey(in_second_argument); + + if (planner_context.hasSet(set_key)) + return; + + /// Tables and table functions are replaced with subquery at Analysis stage, except special Set table. + auto * second_argument_table = in_second_argument->as(); + StorageSet * storage_set = second_argument_table != nullptr ? dynamic_cast(second_argument_table->getStorage().get()) : nullptr; + + if (storage_set) + { + planner_context.registerSet(set_key, PlannerSet(storage_set->getSet())); + } + else if (auto constant_value = in_second_argument->getConstantValueOrNull()) + { + auto set = makeSetForConstantValue( + in_first_argument->getResultType(), + constant_value->getValue(), + constant_value->getType(), + settings); + + planner_context.registerSet(set_key, PlannerSet(std::move(set))); + } + else if (in_second_argument_node_type == QueryTreeNodeType::QUERY || + in_second_argument_node_type == QueryTreeNodeType::UNION) + { + SizeLimits size_limits_for_set = {settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}; + bool tranform_null_in = settings.transform_null_in; + auto set = std::make_shared(size_limits_for_set, false /*fill_set_elements*/, tranform_null_in); + + planner_context.registerSet(set_key, PlannerSet(std::move(set), in_second_argument)); + } + else + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Function '{}' is supported only if second argument is constant or table expression", + function_node->getFunctionName()); + } + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + PlannerContext & planner_context; +}; + +} + +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context) +{ + CollectSetsVisitor visitor(planner_context); + visitor.visit(node); +} + +} diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h new file mode 100644 index 00000000000..94f792e877b --- /dev/null +++ b/src/Planner/CollectSets.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Collect prepared sets and sets for subqueries that are necessary to execute IN function and its variations. + * Collected sets are registered in planner context. + */ +void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); + +} diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp new file mode 100644 index 00000000000..30ccc541507 --- /dev/null +++ b/src/Planner/CollectTableExpressionData.cpp @@ -0,0 +1,116 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor +{ +public: + explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_) + : planner_context(planner_context_) + {} + + void visitImpl(QueryTreeNodePtr & node) + { + auto * column_node = node->as(); + if (!column_node) + return; + + auto column_source_node = column_node->getColumnSource(); + auto column_source_node_type = column_source_node->getNodeType(); + + if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN || + column_source_node_type == QueryTreeNodeType::LAMBDA) + return; + + /// JOIN using expression + if (column_node->hasExpression() && column_source_node->getNodeType() == QueryTreeNodeType::JOIN) + return; + + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node); + + if (column_node->hasExpression()) + { + /// Replace ALIAS column with expression + table_expression_data.addAliasColumnName(column_node->getColumnName()); + node = column_node->getExpression(); + visitImpl(node); + return; + } + + if (column_source_node_type != QueryTreeNodeType::TABLE && + column_source_node_type != QueryTreeNodeType::TABLE_FUNCTION && + column_source_node_type != QueryTreeNodeType::QUERY && + column_source_node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected table, table function, query or union column source. Actual {}", + column_source_node->formatASTForErrorMessage()); + + bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); + if (column_already_exists) + return; + + auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addColumn(column_node->getColumn(), column_identifier); + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + PlannerContext & planner_context; +}; + +} + +void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context) +{ + auto & query_node_typed = query_node->as(); + auto table_expressions_nodes = extractTableExpressions(query_node_typed.getJoinTree()); + + for (auto & table_expression_node : table_expressions_nodes) + { + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(table_expression_node); + + if (auto * table_node = table_expression_node->as()) + { + bool storage_is_remote = table_node->getStorage()->isRemote(); + table_expression_data.setIsRemote(storage_is_remote); + } + else if (auto * table_function_node = table_expression_node->as()) + { + bool storage_is_remote = table_function_node->getStorage()->isRemote(); + table_expression_data.setIsRemote(storage_is_remote); + } + + if (table_expression_data.isRemote()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Remote storages are not supported"); + } + + CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); + collect_source_columns_visitor.visit(query_node); +} + +} diff --git a/src/Planner/CollectTableExpressionData.h b/src/Planner/CollectTableExpressionData.h new file mode 100644 index 00000000000..f4e2d579dca --- /dev/null +++ b/src/Planner/CollectTableExpressionData.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/** Collect table expression data for query node. + * Collected table expression data is registered in planner context. + * + * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression. + */ +void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext & planner_context); + +} diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp new file mode 100644 index 00000000000..97f82d06463 --- /dev/null +++ b/src/Planner/Planner.cpp @@ -0,0 +1,873 @@ +#include + +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int TOO_DEEP_SUBQUERIES; + extern const int NOT_IMPLEMENTED; +} + +/** ClickHouse query planner. + * + * TODO: Support JOIN with JOIN engine. + * TODO: Support VIEWs. + * TODO: JOIN drop unnecessary columns after ON, USING section + * TODO: Support RBAC. Support RBAC for ALIAS columns + * TODO: Support distributed query processing + * TODO: Support PREWHERE + * TODO: Support DISTINCT + * TODO: Support trivial count optimization + * TODO: Support projections + * TODO: Support read in order optimization + * TODO: UNION storage limits + * TODO: Support max streams + * TODO: Support ORDER BY read in order optimization + * TODO: Support GROUP BY read in order optimization + * TODO: Support Key Condition. Support indexes for IN function. + * TODO: Better support for quota and limits. + */ + +namespace +{ + +/** Check that table and table function table expressions from planner context support transactions. + * + * There is precondition that table expression data for table expression nodes is collected in planner context. + */ +void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context) +{ + const auto & query_context = planner_context->getQueryContext(); + if (query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + return; + + if (!query_context->getCurrentTransaction()) + return; + + for (const auto & [table_expression, _] : planner_context->getTableExpressionNodeToData()) + { + StoragePtr storage; + if (auto * table_node = table_expression->as()) + storage = table_node->getStorage(); + else if (auto * table_function_node = table_expression->as()) + storage = table_function_node->getStorage(); + + if (storage->supportsTransactions()) + continue; + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Storage {} (table {}) does not support transactions", + storage->getName(), + storage->getStorageID().getNameForLogs()); + } +} + +void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context) +{ + PreparedSets::SubqueriesForSets subqueries_for_sets; + const auto & set_key_to_planner_set = planner_context->getRegisteredSets(); + + for (const auto & [key, planner_set] : set_key_to_planner_set) + { + const auto subquery_node = planner_set.getSubqueryNode(); + if (!subquery_node) + continue; + + auto subquery_context = buildSubqueryContext(planner_context->getQueryContext()); + auto subquery_options = select_query_options.subquery(); + + Planner subquery_planner( + subquery_node, + subquery_options, + std::move(subquery_context), + planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); + + SubqueryForSet subquery_for_set; + subquery_for_set.set = planner_set.getSet(); + subquery_for_set.source = std::make_unique(std::move(subquery_planner).extractQueryPlan()); + + subqueries_for_sets.emplace(key, std::move(subquery_for_set)); + } + + addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext()); +} + +/// Extend lifetime of query context, storages, and table locks +void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context) +{ + query_plan.addInterpreterContext(planner_context->getQueryContext()); + + for (const auto & [table_expression, _] : planner_context->getTableExpressionNodeToData()) + { + if (auto * table_node = table_expression->as()) + { + query_plan.addStorageHolder(table_node->getStorage()); + query_plan.addTableLock(table_node->getStorageLock()); + } + else if (auto * table_function_node = table_expression->as()) + { + query_plan.addStorageHolder(table_function_node->getStorage()); + } + } +} + +} + +Planner::Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_) + : query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner_context(std::make_shared(std::move(context_), std::make_shared())) +{ + initialize(); +} + +Planner::Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_, + GlobalPlannerContextPtr global_planner_context_) + : query_tree(query_tree_) + , select_query_options(select_query_options_) + , planner_context(std::make_shared(std::move(context_), std::move(global_planner_context_))) +{ + initialize(); +} + +void Planner::initialize() +{ + checkStackSize(); + + if (query_tree->getNodeType() != QueryTreeNodeType::QUERY && + query_tree->getNodeType() != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected QUERY or UNION node. Actual {}", + query_tree->formatASTForErrorMessage()); + + auto & query_context = planner_context->getQueryContext(); + + size_t max_subquery_depth = query_context->getSettingsRef().max_subquery_depth; + if (max_subquery_depth && select_query_options.subquery_depth > max_subquery_depth) + throw Exception(ErrorCodes::TOO_DEEP_SUBQUERIES, + "Too deep subqueries. Maximum: {}", + max_subquery_depth); + + auto * query_node = query_tree->as(); + if (!query_node) + return; + + bool need_apply_query_settings = query_node->hasSettingsChanges(); + + const auto & client_info = query_context->getClientInfo(); + auto min_major = static_cast(DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD); + auto min_minor = static_cast(DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD); + + bool need_to_disable_two_level_aggregation = client_info.query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + client_info.connection_client_version_major < min_major && + client_info.connection_client_version_minor < min_minor; + + if (need_apply_query_settings || need_to_disable_two_level_aggregation) + { + auto updated_context = Context::createCopy(query_context); + + if (need_apply_query_settings) + updated_context->applySettingsChanges(query_node->getSettingsChanges()); + + /// Disable two-level aggregation due to version incompatibility + if (need_to_disable_two_level_aggregation) + { + updated_context->setSetting("group_by_two_level_threshold", Field(0)); + updated_context->setSetting("group_by_two_level_threshold_bytes", Field(0)); + } + + query_context = std::move(updated_context); + } +} + +void Planner::buildQueryPlanIfNeeded() +{ + if (query_plan.isInitialized()) + return; + + auto query_context = planner_context->getQueryContext(); + + if (auto * union_query_tree = query_tree->as()) + { + auto union_mode = union_query_tree->getUnionMode(); + if (union_mode == SelectUnionMode::UNION_DEFAULT || + union_mode == SelectUnionMode::EXCEPT_DEFAULT || + union_mode == SelectUnionMode::INTERSECT_DEFAULT) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized"); + + size_t queries_size = union_query_tree->getQueries().getNodes().size(); + + std::vector> query_plans; + query_plans.reserve(queries_size); + + Blocks query_plans_headers; + query_plans_headers.reserve(queries_size); + + for (auto & query_node : union_query_tree->getQueries().getNodes()) + { + Planner query_planner(query_node, select_query_options, query_context); + query_planner.buildQueryPlanIfNeeded(); + auto query_node_plan = std::make_unique(std::move(query_planner).extractQueryPlan()); + query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header); + query_plans.push_back(std::move(query_node_plan)); + } + + Block union_common_header = buildCommonHeaderForUnion(query_plans_headers); + DataStreams query_plans_streams; + query_plans_streams.reserve(query_plans.size()); + + for (auto & query_node_plan : query_plans) + { + if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header)) + { + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + continue; + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(), + union_common_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(query_node_plan->getCurrentDataStream(), std::move(actions_dag)); + converting_step->setStepDescription("Conversion before UNION"); + query_node_plan->addStep(std::move(converting_step)); + + query_plans_streams.push_back(query_node_plan->getCurrentDataStream()); + } + + const auto & settings = query_context->getSettingsRef(); + auto max_threads = settings.max_threads; + + bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT || + union_mode == SelectUnionMode::EXCEPT_DISTINCT; + + if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT) + { + auto union_step = std::make_unique(std::move(query_plans_streams), max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT || + union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT) + { + IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN; + + if (union_mode == SelectUnionMode::INTERSECT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL; + else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT; + else if (union_mode == SelectUnionMode::EXCEPT_ALL) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL; + else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT) + intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT; + + auto union_step = std::make_unique(std::move(query_plans_streams), intersect_or_except_operator, max_threads); + query_plan.unitePlans(std::move(union_step), std::move(query_plans)); + } + + if (is_distinct) + { + /// Add distinct transform + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + 0 /*limit hint*/, + query_plan.getCurrentDataStream().header.getNames(), + false /*pre distinct*/, + settings.optimize_distinct_in_order); + + query_plan.addStep(std::move(distinct_step)); + } + + return; + } + + auto & query_node = query_tree->as(); + + if (query_node.hasPrewhere()) + { + if (query_node.hasWhere()) + { + auto function_node = std::make_shared("and"); + auto and_function = FunctionFactory::instance().get("and", query_context); + function_node->resolveAsFunction(std::move(and_function), std::make_shared()); + function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()}; + query_node.getWhere() = std::move(function_node); + query_node.getPrewhere() = {}; + } + else + { + query_node.getWhere() = query_node.getPrewhere(); + } + } + + SelectQueryInfo select_query_info; + select_query_info.original_query = queryNodeToSelectQuery(query_tree); + select_query_info.query = select_query_info.original_query; + select_query_info.planner_context = planner_context; + + StorageLimitsList storage_limits; + storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); + select_query_info.storage_limits = std::make_shared(storage_limits); + + collectTableExpressionData(query_tree, *planner_context); + checkStoragesSupportTransactions(planner_context); + + collectSets(query_tree, *planner_context); + + query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context); + auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context); + + if (expression_analysis_result.hasWhere()) + { + const auto & where_analysis_result = expression_analysis_result.getWhere(); + auto where_step = std::make_unique(query_plan.getCurrentDataStream(), + where_analysis_result.filter_actions, + where_analysis_result.filter_column_name, + where_analysis_result.remove_filter_column); + where_step->setStepDescription("WHERE"); + query_plan.addStep(std::move(where_step)); + } + + bool having_executed = false; + + if (expression_analysis_result.hasAggregation()) + { + const auto & aggregation_analysis_result = expression_analysis_result.getAggregation(); + + if (aggregation_analysis_result.before_aggregation_actions) + { + auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), aggregation_analysis_result.before_aggregation_actions); + expression_before_aggregation->setStepDescription("Before GROUP BY"); + query_plan.addStep(std::move(expression_before_aggregation)); + } + + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + + const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + select_query_info.query, + settings.collect_hash_table_stats_during_aggregation, + settings.max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_aggregation); + + bool aggregate_overflow_row = + query_node.isGroupByWithTotals() && + settings.max_rows_to_group_by && + settings.group_by_overflow_mode == OverflowMode::ANY && + settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE; + + Aggregator::Params aggregator_params = Aggregator::Params( + aggregation_analysis_result.aggregation_keys, + aggregation_analysis_result.aggregate_descriptions, + aggregate_overflow_row, + settings.max_rows_to_group_by, + settings.group_by_overflow_mode, + settings.group_by_two_level_threshold, + settings.group_by_two_level_threshold_bytes, + settings.max_bytes_before_external_group_by, + settings.empty_result_for_aggregation_by_empty_set + || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty() + && aggregation_analysis_result.group_by_with_constant_keys), + planner_context->getQueryContext()->getTempDataOnDisk(), + settings.max_threads, + settings.min_free_disk_space_for_temporary_data, + settings.compile_aggregate_expressions, + settings.min_count_to_compile_aggregate_expression, + settings.max_block_size, + settings.enable_software_prefetch_in_aggregation, + /* only_merge */ false, + stats_collecting_params + ); + + SortDescription group_by_sort_description; + + auto merge_threads = settings.max_threads; + auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads + ? static_cast(settings.aggregation_memory_efficient_merge_threads) + : static_cast(settings.max_threads); + + bool storage_has_evenly_distributed_read = false; + const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData(); + + if (table_expression_node_to_data.size() == 1) + { + auto it = table_expression_node_to_data.begin(); + const auto & table_expression_node = it->first; + if (const auto * table_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead(); + else if (const auto * table_function_node = table_expression_node->as()) + storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead(); + } + + const bool should_produce_results_in_order_of_bucket_number + = select_query_options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient; + + InputOrderInfoPtr input_order_info; + bool aggregate_final = + select_query_options.to_stage > QueryProcessingStage::WithMergeableState && + !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + + auto aggregating_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregator_params, + aggregation_analysis_result.grouping_sets_parameters_list, + aggregate_final, + settings.max_block_size, + settings.aggregation_in_order_max_block_bytes, + merge_threads, + temporary_data_merge_threads, + storage_has_evenly_distributed_read, + settings.group_by_use_nulls, + std::move(input_order_info), + std::move(group_by_sort_description), + should_produce_results_in_order_of_bucket_number); + query_plan.addStep(std::move(aggregating_step)); + + if (query_node.isGroupByWithRollup()) + { + auto rollup_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(rollup_step)); + } + else if (query_node.isGroupByWithCube()) + { + auto cube_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls); + query_plan.addStep(std::move(cube_step)); + } + + if (query_node.isGroupByWithTotals()) + { + const auto & having_analysis_result = expression_analysis_result.getHaving(); + bool final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + having_executed = true; + + auto totals_having_step = std::make_unique( + query_plan.getCurrentDataStream(), + aggregation_analysis_result.aggregate_descriptions, + aggregate_overflow_row, + having_analysis_result.filter_actions, + having_analysis_result.filter_column_name, + having_analysis_result.remove_filter_column, + settings.totals_mode, + settings.totals_auto_threshold, + final); + + query_plan.addStep(std::move(totals_having_step)); + } + } + + if (!having_executed && expression_analysis_result.hasHaving()) + { + const auto & having_analysis_result = expression_analysis_result.getHaving(); + + auto having_step = std::make_unique(query_plan.getCurrentDataStream(), + having_analysis_result.filter_actions, + having_analysis_result.filter_column_name, + having_analysis_result.remove_filter_column); + having_step->setStepDescription("HAVING"); + query_plan.addStep(std::move(having_step)); + } + + if (expression_analysis_result.hasWindow()) + { + const auto & window_analysis_result = expression_analysis_result.getWindow(); + + if (window_analysis_result.before_window_actions) + { + auto expression_step_before_window = std::make_unique(query_plan.getCurrentDataStream(), window_analysis_result.before_window_actions); + expression_step_before_window->setStepDescription("Before WINDOW"); + query_plan.addStep(std::move(expression_step_before_window)); + } + + auto window_descriptions = window_analysis_result.window_descriptions; + sortWindowDescriptions(window_descriptions); + + size_t window_descriptions_size = window_descriptions.size(); + + const auto & settings = query_context->getSettingsRef(); + for (size_t i = 0; i < window_descriptions_size; ++i) + { + const auto & window_description = window_descriptions[i]; + + /** We don't need to sort again if the input from previous window already + * has suitable sorting. Also don't create sort steps when there are no + * columns to sort by, because the sort nodes are confused by this. It + * happens in case of `over ()`. + */ + if (!window_description.full_sort_description.empty() && + (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description))) + { + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + window_description.full_sort_description, + settings.max_block_size, + 0 /*limit*/, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + + sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(sorting_step)); + } + + auto window_step = std::make_unique(query_plan.getCurrentDataStream(), window_description, window_description.window_functions); + window_step->setStepDescription("Window step for window '" + window_description.window_name + "'"); + query_plan.addStep(std::move(window_step)); + } + } + + const auto & projection_analysis_result = expression_analysis_result.getProjection(); + auto expression_step_projection = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.projection_actions); + expression_step_projection->setStepDescription("Projection"); + query_plan.addStep(std::move(expression_step_projection)); + + UInt64 limit_offset = 0; + if (query_node.hasOffset()) + { + /// Constness of offset is validated during query analysis stage + limit_offset = query_node.getOffset()->getConstantValue().getValue().safeGet(); + } + + UInt64 limit_length = 0; + + if (query_node.hasLimit()) + { + /// Constness of limit is validated during query analysis stage + limit_length = query_node.getLimit()->getConstantValue().getValue().safeGet(); + } + + if (query_node.isDistinct()) + { + const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); + UInt64 limit_hint_for_distinct = 0; + bool pre_distinct = true; + + SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); + bool no_order_by = !query_node.hasOrderBy(); + + /** If after this stage of DISTINCT ORDER BY is not executed, + * then you can get no more than limit_length + limit_offset of different rows. + */ + if (no_order_by && limit_length <= std::numeric_limits::max() - limit_offset) + limit_hint_for_distinct = limit_length + limit_offset; + + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + limit_hint_for_distinct, + projection_analysis_result.projection_column_names, + pre_distinct, + settings.optimize_distinct_in_order); + + if (pre_distinct) + distinct_step->setStepDescription("Preliminary DISTINCT"); + else + distinct_step->setStepDescription("DISTINCT"); + + query_plan.addStep(std::move(distinct_step)); + } + + if (expression_analysis_result.hasSort()) + { + const auto & sort_analysis_result = expression_analysis_result.getSort(); + auto expression_step_before_order_by = std::make_unique(query_plan.getCurrentDataStream(), sort_analysis_result.before_order_by_actions); + expression_step_before_order_by->setStepDescription("Before ORDER BY"); + query_plan.addStep(std::move(expression_step_before_order_by)); + } + + QueryPlanStepPtr filling_step; + SortDescription sort_description; + + if (query_node.hasOrderBy()) + { + sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context); + + bool query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree); + + UInt64 partial_sorting_limit = 0; + + /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN + if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() && + !query_has_array_join_in_join_tree && limit_length <= std::numeric_limits::max() - limit_offset) + { + partial_sorting_limit = limit_length + limit_offset; + } + + const Settings & settings = query_context->getSettingsRef(); + + /// Merge the sorted blocks + auto sorting_step = std::make_unique( + query_plan.getCurrentDataStream(), + sort_description, + settings.max_block_size, + partial_sorting_limit, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + + sorting_step->setStepDescription("Sorting for ORDER BY"); + query_plan.addStep(std::move(sorting_step)); + + NameSet column_names_with_fill; + SortDescription fill_description; + for (auto & description : sort_description) + { + if (description.with_fill) + { + fill_description.push_back(description); + column_names_with_fill.insert(description.column_name); + } + } + + if (!fill_description.empty()) + { + InterpolateDescriptionPtr interpolate_description; + + if (query_node.hasInterpolate()) + { + auto interpolate_actions_dag = std::make_shared(); + + auto & interpolate_list_node = query_node.getInterpolate()->as(); + auto & interpolate_list_nodes = interpolate_list_node.getNodes(); + + if (interpolate_list_nodes.empty()) + { + auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + for (auto & query_plan_column : query_plan_columns) + { + if (column_names_with_fill.contains(query_plan_column.name)) + continue; + + const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column); + interpolate_actions_dag->getOutputs().push_back(input_action_node); + } + } + else + { + for (auto & interpolate_node : interpolate_list_nodes) + { + auto & interpolate_node_typed = interpolate_node->as(); + + PlannerActionsVisitor planner_actions_visitor(planner_context); + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression()); + auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); + + if (expression_to_interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); + + if (interpolate_expression_nodes.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); + + const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0]; + const auto & expression_to_interpolate_name = expression_to_interpolate->result_name; + + const auto * interpolate_expression = interpolate_expression_nodes[0]; + if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type)) + { + auto cast_type_name = expression_to_interpolate->result_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName column; + column.name = calculateConstantActionNodeName(cast_type_name); + column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + column.type = std::make_shared(); + + const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column)); + + FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node}; + interpolate_expression = &interpolate_actions_dag->addFunction(func_builder_cast, std::move(children), interpolate_expression->result_name); + } + + const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name); + interpolate_actions_dag->getOutputs().push_back(alias_node); + } + + interpolate_actions_dag->removeUnusedActions(); + } + + Aliases empty_aliases; + interpolate_description = std::make_shared(std::move(interpolate_actions_dag), empty_aliases); + } + + filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description); + } + } + + if (expression_analysis_result.hasLimitBy()) + { + const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); + auto expression_step_before_limit_by = std::make_unique(query_plan.getCurrentDataStream(), limit_by_analysis_result.before_limit_by_actions); + expression_step_before_limit_by->setStepDescription("Before LIMIT BY"); + query_plan.addStep(std::move(expression_step_before_limit_by)); + + /// Constness of LIMIT BY limit is validated during query analysis stage + UInt64 limit_by_limit = query_node.getLimitByLimit()->getConstantValue().getValue().safeGet(); + UInt64 limit_by_offset = 0; + + if (query_node.hasLimitByOffset()) + { + /// Constness of LIMIT BY offset is validated during query analysis stage + limit_by_offset = query_node.getLimitByOffset()->getConstantValue().getValue().safeGet(); + } + + auto limit_by_step = std::make_unique(query_plan.getCurrentDataStream(), + limit_by_limit, + limit_by_offset, + limit_by_analysis_result.limit_by_column_names); + query_plan.addStep(std::move(limit_by_step)); + } + + if (filling_step) + query_plan.addStep(std::move(filling_step)); + + if (query_context->getSettingsRef().extremes) + { + auto extremes_step = std::make_unique(query_plan.getCurrentDataStream()); + query_plan.addStep(std::move(extremes_step)); + } + + if (query_node.hasLimit()) + { + const Settings & settings = query_context->getSettingsRef(); + bool always_read_till_end = settings.exact_rows_before_limit; + bool limit_with_ties = query_node.isLimitWithTies(); + + /** Special cases: + * + * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end, + * otherwise TOTALS is counted according to incomplete data. + * + * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels, + * then when using LIMIT, you should read the data to the end, rather than cancel the query earlier, + * because if you cancel the query, we will not get `totals` data from the remote server. + */ + if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy()) + always_read_till_end = true; + + if (!query_node.isGroupByWithTotals() && queryHasWithTotalsInAnySubqueryInJoinTree(query_tree)) + always_read_till_end = true; + + SortDescription limit_with_ties_sort_description; + + if (query_node.isLimitWithTies()) + { + /// Validated during parser stage + if (!query_node.hasOrderBy()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); + + limit_with_ties_sort_description = sort_description; + } + + auto limit = std::make_unique(query_plan.getCurrentDataStream(), + limit_length, + limit_offset, + always_read_till_end, + limit_with_ties, + limit_with_ties_sort_description); + + if (limit_with_ties) + limit->setStepDescription("LIMIT WITH TIES"); + + query_plan.addStep(std::move(limit)); + } + else if (query_node.hasOffset()) + { + auto offsets_step = std::make_unique(query_plan.getCurrentDataStream(), limit_offset); + query_plan.addStep(std::move(offsets_step)); + } + + auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), projection_analysis_result.project_names_actions); + projection_step->setStepDescription("Project names"); + query_plan.addStep(std::move(projection_step)); + + addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context); + extendQueryContextAndStoragesLifetime(query_plan, planner_context); +} + +} diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h new file mode 100644 index 00000000000..03f8e19df56 --- /dev/null +++ b/src/Planner/Planner.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include +#include +#include + +namespace DB +{ + +class GlobalPlannerContext; +using GlobalPlannerContextPtr = std::shared_ptr; + +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +class Planner +{ +public: + /// Initialize planner with query tree after analysis phase + Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_); + + /// Initialize planner with query tree after query analysis phase and global planner context + Planner(const QueryTreeNodePtr & query_tree_, + const SelectQueryOptions & select_query_options_, + ContextPtr context_, + GlobalPlannerContextPtr global_planner_context_); + + const QueryPlan & getQueryPlan() const + { + return query_plan; + } + + QueryPlan & getQueryPlan() + { + return query_plan; + } + + void buildQueryPlanIfNeeded(); + + QueryPlan && extractQueryPlan() && + { + return std::move(query_plan); + } + +private: + void initialize(); + + QueryTreeNodePtr query_tree; + QueryPlan query_plan; + SelectQueryOptions select_query_options; + PlannerContextPtr planner_context; +}; + +} diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp new file mode 100644 index 00000000000..7bc53127101 --- /dev/null +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -0,0 +1,756 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class ActionsScopeNode +{ +public: + explicit ActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_) + : actions_dag(std::move(actions_dag_)) + , scope_node(std::move(scope_node_)) + { + for (const auto & node : actions_dag->getNodes()) + node_name_to_node[node.result_name] = &node; + } + + const QueryTreeNodePtr & getScopeNode() const + { + return scope_node; + } + + [[maybe_unused]] bool containsNode(const std::string & node_name) + { + return node_name_to_node.find(node_name) != node_name_to_node.end(); + } + + [[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name) + { + auto it = node_name_to_node.find(node_name); + if (it == node_name_to_node.end()) + return {}; + + return it->second; + } + + const ActionsDAG::Node * getNodeOrThrow(const std::string & node_name) + { + auto it = node_name_to_node.find(node_name); + if (it == node_name_to_node.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "No node with name {}. There are only nodes {}", + node_name, + actions_dag->dumpNames()); + + return it->second; + } + + const ActionsDAG::Node * addInputColumnIfNecessary(const std::string & node_name, const DataTypePtr & column_type) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addInput(node_name, column_type); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addInputConstantColumnIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addInput(column); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addColumn(column); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOverloadResolverPtr function) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addFunction(function, children, node_name); + node_name_to_node[node->result_name] = node; + + return node; + } + + const ActionsDAG::Node * addArrayJoinIfNecessary(const std::string & node_name, const ActionsDAG::Node * child) + { + auto it = node_name_to_node.find(node_name); + if (it != node_name_to_node.end()) + return it->second; + + const auto * node = &actions_dag->addArrayJoin(*child, node_name); + node_name_to_node[node->result_name] = node; + + return node; + } + +private: + std::unordered_map node_name_to_node; + ActionsDAGPtr actions_dag; + QueryTreeNodePtr scope_node; +}; + +class PlannerActionsVisitorImpl +{ +public: + PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_); + + ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); + +private: + using NodeNameAndNodeMinLevel = std::pair; + + NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node); + + NodeNameAndNodeMinLevel visitColumn(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitConstantValue(const Field & constant_literal, const DataTypePtr & constant_type); + + NodeNameAndNodeMinLevel visitConstant(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitLambda(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel makeSetForInFunction(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitFunction(const QueryTreeNodePtr & node); + + NodeNameAndNodeMinLevel visitQueryOrUnion(const QueryTreeNodePtr & node); + + std::vector actions_stack; + std::unordered_map node_to_node_name; + const PlannerContextPtr planner_context; +}; + +PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, const PlannerContextPtr & planner_context_) + : planner_context(planner_context_) +{ + actions_stack.emplace_back(std::move(actions_dag), nullptr); +} + +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node) +{ + ActionsDAG::NodeRawConstPtrs result; + + if (auto * expression_list_node = expression_node->as()) + { + for (auto & node : expression_list_node->getNodes()) + { + auto [node_name, _] = visitImpl(node); + result.push_back(actions_stack.front().getNodeOrThrow(node_name)); + } + } + else + { + auto [node_name, _] = visitImpl(expression_node); + result.push_back(actions_stack.front().getNodeOrThrow(node_name)); + } + + return result; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitImpl(QueryTreeNodePtr node) +{ + auto node_type = node->getNodeType(); + + if (node_type == QueryTreeNodeType::COLUMN) + return visitColumn(node); + else if (node_type == QueryTreeNodeType::CONSTANT) + return visitConstant(node); + else if (node_type == QueryTreeNodeType::FUNCTION) + return visitFunction(node); + else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) + return visitQueryOrUnion(node); + + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Expected column, constant, function, query or union node. Actual {}", + node->formatASTForErrorMessage()); +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node) +{ + auto column_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name); + const auto & column_node = node->as(); + + Int64 actions_stack_size = static_cast(actions_stack.size() - 1); + for (Int64 i = actions_stack_size; i >= 0; --i) + { + actions_stack[i].addInputColumnIfNecessary(column_node_name, column_node.getColumnType()); + + auto column_source = column_node.getColumnSourceOrNull(); + if (column_source && + column_source->getNodeType() == QueryTreeNodeType::LAMBDA && + actions_stack[i].getScopeNode().get() == column_source.get()) + { + return {column_node_name, i}; + } + } + + return {column_node_name, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstantValue(const Field & constant_literal, const DataTypePtr & constant_type) +{ + auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + + ColumnWithTypeAndName column; + column.name = constant_node_name; + column.type = constant_type; + column.column = column.type->createColumnConst(1, constant_literal); + + actions_stack[0].addConstantIfNecessary(constant_node_name, column); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); + } + + return {constant_node_name, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node) +{ + const auto & constant_node = node->as(); + return visitConstantValue(constant_node.getValue(), constant_node.getResultType()); +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node) +{ + auto & lambda_node = node->as(); + auto result_type = lambda_node.getResultType(); + if (!result_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Lambda {} is not resolved during query analysis", + lambda_node.formatASTForErrorMessage()); + + auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes(); + size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size(); + + NamesAndTypesList lambda_arguments_names_and_types; + + for (size_t i = 0; i < lambda_arguments_nodes_size; ++i) + { + const auto & lambda_argument_name = lambda_node.getArgumentNames().at(i); + auto lambda_argument_type = lambda_arguments_nodes[i]->getResultType(); + lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type)); + } + + auto lambda_actions_dag = std::make_shared(); + actions_stack.emplace_back(lambda_actions_dag, node); + + auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression()); + lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); + lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); + + auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes); + auto lambda_actions = std::make_shared(lambda_actions_dag, expression_actions_settings); + + Names captured_column_names; + ActionsDAG::NodeRawConstPtrs lambda_children; + Names required_column_names = lambda_actions->getRequiredColumns(); + + if (level == actions_stack.size() - 1) + --level; + + const auto & lambda_argument_names = lambda_node.getArgumentNames(); + + for (const auto & required_column_name : required_column_names) + { + auto it = std::find(lambda_argument_names.begin(), lambda_argument_names.end(), required_column_name); + + if (it == lambda_argument_names.end()) + { + lambda_children.push_back(actions_stack[level].getNodeOrThrow(required_column_name)); + captured_column_names.push_back(required_column_name); + } + } + + auto lambda_node_name = calculateActionNodeName(node, *planner_context); + auto function_capture = std::make_shared( + lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name); + actions_stack.pop_back(); + + actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), std::move(function_capture)); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = level + 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type); + } + + return {lambda_node_name, level}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node) +{ + const auto & function_node = node->as(); + auto in_second_argument = function_node.getArguments().getNodes().at(1); + + auto set_key = planner_context->createSetKey(in_second_argument); + const auto & planner_set = planner_context->getSetOrThrow(set_key); + + ColumnWithTypeAndName column; + column.name = set_key; + column.type = std::make_shared(); + + bool set_is_created = planner_set.getSet()->isCreated(); + auto column_set = ColumnSet::create(1, planner_set.getSet()); + + if (set_is_created) + column.column = ColumnConst::create(std::move(column_set), 1); + else + column.column = std::move(column_set); + + actions_stack[0].addConstantIfNecessary(set_key, column); + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputConstantColumnIfNecessary(set_key, column); + } + + node_to_node_name.emplace(in_second_argument, set_key); + + return {set_key, 0}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) +{ + const auto & function_node = node->as(); + if (const auto constant_value_or_null = function_node.getConstantValueOrNull()) + return visitConstantValue(constant_value_or_null->getValue(), constant_value_or_null->getType()); + + std::optional in_function_second_argument_node_name_with_level; + + if (isNameOfInFunction(function_node.getFunctionName())) + in_function_second_argument_node_name_with_level = makeSetForInFunction(node); + + const auto & function_arguments = function_node.getArguments().getNodes(); + size_t function_arguments_size = function_arguments.size(); + + Names function_arguments_node_names; + function_arguments_node_names.reserve(function_arguments_size); + + size_t level = 0; + for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) + { + if (in_function_second_argument_node_name_with_level && function_argument_index == 1) + { + auto & [node_name, node_min_level] = *in_function_second_argument_node_name_with_level; + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + continue; + } + + const auto & argument = function_arguments[function_argument_index]; + + if (argument->getNodeType() == QueryTreeNodeType::LAMBDA) + { + auto [node_name, node_min_level] = visitLambda(argument); + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + continue; + } + + auto [node_name, node_min_level] = visitImpl(argument); + function_arguments_node_names.push_back(std::move(node_name)); + level = std::max(level, node_min_level); + } + + auto function_node_name = calculateActionNodeName(node, *planner_context, node_to_node_name); + + if (function_node.isAggregateFunction() || function_node.isWindowFunction()) + { + size_t actions_stack_size = actions_stack.size(); + + for (size_t i = 0; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); + } + + return {function_node_name, 0}; + } + + ActionsDAG::NodeRawConstPtrs children; + children.reserve(function_arguments_size); + + for (auto & function_argument_node_name : function_arguments_node_names) + children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name)); + + if (function_node.getFunctionName() == "arrayJoin") + { + if (level != 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression in arrayJoin cannot depend on lambda argument: {} ", + function_arguments_node_names.at(0)); + + actions_stack[level].addArrayJoinIfNecessary(function_node_name, children.at(0)); + } + else + { + actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node.getFunction()); + } + + size_t actions_stack_size = actions_stack.size(); + for (size_t i = level + 1; i < actions_stack_size; ++i) + { + auto & actions_stack_node = actions_stack[i]; + actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); + } + + return {function_node_name, level}; +} + +PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitQueryOrUnion(const QueryTreeNodePtr & node) +{ + const auto constant_value = node->getConstantValueOrNull(); + if (!constant_value) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Scalar subqueries must be evaluated as constants"); + + return visitConstantValue(constant_value->getValue(), constant_value->getType()); +} + +} + +PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_) + : planner_context(planner_context_) +{} + +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node) +{ + PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context); + return actions_visitor_impl.visit(expression_node); +} + +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name) +{ + auto it = node_to_name.find(node); + if (it != node_to_name.end()) + return it->second; + + String result; + auto node_type = node->getNodeType(); + + switch (node_type) + { + case QueryTreeNodeType::COLUMN: + { + const auto * column_identifier = planner_context.getColumnNodeIdentifierOrNull(node); + result = column_identifier ? *column_identifier : node->getName(); + + break; + } + case QueryTreeNodeType::CONSTANT: + { + const auto & constant_node = node->as(); + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + break; + } + case QueryTreeNodeType::FUNCTION: + { + if (auto node_constant_value = node->getConstantValueOrNull()) + { + result = calculateConstantActionNodeName(node_constant_value->getValue(), node_constant_value->getType()); + } + else + { + const auto & function_node = node->as(); + String in_function_second_argument_node_name; + + if (isNameOfInFunction(function_node.getFunctionName())) + { + const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); + in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node); + } + + WriteBufferFromOwnString buffer; + buffer << function_node.getFunctionName(); + + const auto & function_parameters_nodes = function_node.getParameters().getNodes(); + + if (!function_parameters_nodes.empty()) + { + buffer << '('; + + size_t function_parameters_nodes_size = function_parameters_nodes.size(); + for (size_t i = 0; i < function_parameters_nodes_size; ++i) + { + const auto & function_parameter_node = function_parameters_nodes[i]; + buffer << calculateActionNodeName(function_parameter_node, planner_context, node_to_name); + + if (i + 1 != function_parameters_nodes_size) + buffer << ", "; + } + + buffer << ')'; + } + + const auto & function_arguments_nodes = function_node.getArguments().getNodes(); + String function_argument_name; + + buffer << '('; + + size_t function_arguments_nodes_size = function_arguments_nodes.size(); + for (size_t i = 0; i < function_arguments_nodes_size; ++i) + { + if (i == 1 && !in_function_second_argument_node_name.empty()) + { + function_argument_name = in_function_second_argument_node_name; + } + else + { + const auto & function_argument_node = function_arguments_nodes[i]; + function_argument_name = calculateActionNodeName(function_argument_node, planner_context, node_to_name); + } + + buffer << function_argument_name; + + if (i + 1 != function_arguments_nodes_size) + buffer << ", "; + } + + buffer << ')'; + + if (function_node.isWindowFunction()) + { + buffer << " OVER ("; + buffer << calculateWindowNodeActionName(function_node.getWindowNode(), planner_context, node_to_name); + buffer << ')'; + } + + result = buffer.str(); + } + break; + } + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + { + if (auto node_constant_value = node->getConstantValueOrNull()) + { + result = calculateConstantActionNodeName(node_constant_value->getValue(), node_constant_value->getType()); + } + else + { + auto query_hash = node->getTreeHash(); + result = "__subquery_" + std::to_string(query_hash.first) + '_' + std::to_string(query_hash.second); + } + break; + } + case QueryTreeNodeType::LAMBDA: + { + auto lambda_hash = node->getTreeHash(); + + result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage()); + } + } + + node_to_name.emplace(node, result); + + return result; +} + +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + QueryTreeNodeToName empty_map; + return calculateActionNodeName(node, planner_context, empty_map); +} + +String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type) +{ + auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal); + return constant_name + "_" + constant_type->getName(); +} + +String calculateConstantActionNodeName(const Field & constant_literal) +{ + return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal)); +} + +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name) +{ + auto & window_node = node->as(); + WriteBufferFromOwnString buffer; + + if (window_node.hasPartitionBy()) + { + buffer << "PARTITION BY "; + + auto & partition_by_nodes = window_node.getPartitionBy().getNodes(); + size_t partition_by_nodes_size = partition_by_nodes.size(); + + for (size_t i = 0; i < partition_by_nodes_size; ++i) + { + auto & partition_by_node = partition_by_nodes[i]; + buffer << calculateActionNodeName(partition_by_node, planner_context, node_to_name); + if (i + 1 != partition_by_nodes_size) + buffer << ", "; + } + } + + if (window_node.hasOrderBy()) + { + if (window_node.hasPartitionBy()) + buffer << ' '; + + buffer << "ORDER BY "; + + auto & order_by_nodes = window_node.getOrderBy().getNodes(); + size_t order_by_nodes_size = order_by_nodes.size(); + + for (size_t i = 0; i < order_by_nodes_size; ++i) + { + auto & sort_node = order_by_nodes[i]->as(); + buffer << calculateActionNodeName(sort_node.getExpression(), planner_context, node_to_name); + + auto sort_direction = sort_node.getSortDirection(); + buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC"); + + auto nulls_sort_direction = sort_node.getNullsSortDirection(); + + if (nulls_sort_direction) + buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST"); + + if (auto collator = sort_node.getCollator()) + buffer << " COLLATE " << collator->getLocale(); + + if (sort_node.withFill()) + { + buffer << " WITH FILL"; + + if (sort_node.hasFillFrom()) + buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom(), planner_context, node_to_name); + + if (sort_node.hasFillTo()) + buffer << " TO " << calculateActionNodeName(sort_node.getFillTo(), planner_context, node_to_name); + + if (sort_node.hasFillStep()) + buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep(), planner_context, node_to_name); + } + + if (i + 1 != order_by_nodes_size) + buffer << ", "; + } + } + + auto & window_frame = window_node.getWindowFrame(); + if (!window_frame.is_default) + { + if (window_node.hasPartitionBy() || window_node.hasOrderBy()) + buffer << ' '; + + buffer << window_frame.type << " BETWEEN "; + if (window_frame.begin_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode(), planner_context, node_to_name); + buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); + } + + buffer << " AND "; + + if (window_frame.end_type == WindowFrame::BoundaryType::Current) + { + buffer << "CURRENT ROW"; + } + else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded) + { + buffer << "UNBOUNDED"; + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + else + { + buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode(), planner_context, node_to_name); + buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); + } + } + + return buffer.str(); +} + +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + QueryTreeNodeToName empty_map; + return calculateWindowNodeActionName(node, planner_context, empty_map); +} + +} diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h new file mode 100644 index 00000000000..405031daa40 --- /dev/null +++ b/src/Planner/PlannerActionsVisitor.h @@ -0,0 +1,78 @@ +#pragma once + +#include + +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +/** Planner actions visitor is responsible for adding necessary actions to calculate query tree expression node + * into actions dag. + * + * Preconditions: + * 1. Table expression data for table expression nodes is collected in planner context. + * For column node, that has column table expression source, identifier for column name in table expression data + * is used as action dag node name. + * 2. Sets for IN functions are already collected in planner context. + * + * During actions build, there is special handling for following functions: + * 1. Aggregate functions are added in actions dag as INPUT nodes. Aggregate functions arguments are not added. + * 2. For function `in` and its variants, already collected sets from planner context are used. + */ +class PlannerActionsVisitor +{ +public: + explicit PlannerActionsVisitor(const PlannerContextPtr & planner_context_); + + /** Add actions necessary to calculate expression node into expression dag. + * Necessary actions are not added in actions dag output. + * Returns query tree expression node actions dag nodes. + */ + ActionsDAG::NodeRawConstPtrs visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node); + +private: + const PlannerContextPtr planner_context; +}; + +/** Calculate query tree expression node action dag name and add them into node to name map. + * If node exists in map, name from map is used. + * + * For column node column node identifier from planner context is used. + */ +using QueryTreeNodeToName = std::unordered_map; +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name); + +/** Calculate query tree expression node action dag name. + * + * For column node column node identifier from planner context is used. + */ +String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context); + +/// Calculate action node name for constant +String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type); + +/// Calculate action node name for constant, data type will be derived from constant literal value +String calculateConstantActionNodeName(const Field & constant_literal); + +/** Calculate action node name for window node. + * Window node action name can only be part of window function action name. + */ +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, QueryTreeNodeToName & node_to_name); + +/** Calculate action node name for window node. + * Window node action name can only be part of window function action name. + */ +String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context); + +} diff --git a/src/Planner/PlannerAggregation.cpp b/src/Planner/PlannerAggregation.cpp new file mode 100644 index 00000000000..3322ef9364f --- /dev/null +++ b/src/Planner/PlannerAggregation.cpp @@ -0,0 +1,225 @@ +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +enum class GroupByKind +{ + ORDINARY, + ROLLUP, + CUBE, + GROUPING_SETS +}; + +class GroupingFunctionResolveVisitor : public InDepthQueryTreeVisitor +{ +public: + GroupingFunctionResolveVisitor(GroupByKind group_by_kind_, + const Names & aggregation_keys_, + const GroupingSetsParamsList & grouping_sets_parameters_list_, + const PlannerContext & planner_context_) + : group_by_kind(group_by_kind_) + , planner_context(planner_context_) + { + size_t aggregation_keys_size = aggregation_keys_.size(); + for (size_t i = 0; i < aggregation_keys_size; ++i) + aggegation_key_to_index.emplace(aggregation_keys_[i], i); + + for (const auto & grouping_sets_parameter : grouping_sets_parameters_list_) + { + grouping_sets_keys_indices.emplace_back(); + auto & grouping_set_keys_indices = grouping_sets_keys_indices.back(); + + for (const auto & used_key : grouping_sets_parameter.used_keys) + { + auto aggregation_key_index_it = aggegation_key_to_index.find(used_key); + if (aggregation_key_index_it == aggegation_key_to_index.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Aggregation key {} in GROUPING SETS is not found in GROUP BY keys"); + + grouping_set_keys_indices.push_back(aggregation_key_index_it->second); + } + } + } + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "grouping") + return; + + size_t aggregation_keys_size = aggegation_key_to_index.size(); + + ColumnNumbers arguments_indexes; + + for (const auto & argument : function_node->getArguments().getNodes()) + { + String action_node_name = calculateActionNodeName(argument, planner_context); + + auto it = aggegation_key_to_index.find(action_node_name); + if (it == aggegation_key_to_index.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Argument of GROUPING function {} is not a part of GROUP BY clause", + argument->formatASTForErrorMessage()); + + arguments_indexes.push_back(it->second); + } + + QueryTreeNodeWeakPtr column_source; + auto grouping_set_argument_column = std::make_shared(NameAndTypePair{"__grouping_set", std::make_shared()}, column_source); + function_node->getArguments().getNodes().clear(); + + bool force_grouping_standard_compatibility = planner_context.getQueryContext()->getSettingsRef().force_grouping_standard_compatibility; + + switch (group_by_kind) + { + case GroupByKind::ORDINARY: + { + auto grouping_ordinary_function = std::make_shared(arguments_indexes, force_grouping_standard_compatibility); + auto grouping_ordinary_function_adaptor = std::make_shared(std::move(grouping_ordinary_function)); + function_node->resolveAsFunction(std::move(grouping_ordinary_function_adaptor), std::make_shared()); + break; + } + case GroupByKind::ROLLUP: + { + auto grouping_rollup_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); + auto grouping_rollup_function_adaptor = std::make_shared(std::move(grouping_rollup_function)); + function_node->resolveAsFunction(std::move(grouping_rollup_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + case GroupByKind::CUBE: + { + auto grouping_cube_function = std::make_shared(arguments_indexes, aggregation_keys_size, force_grouping_standard_compatibility); + auto grouping_cube_function_adaptor = std::make_shared(std::move(grouping_cube_function)); + function_node->resolveAsFunction(std::move(grouping_cube_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + case GroupByKind::GROUPING_SETS: + { + auto grouping_grouping_sets_function = std::make_shared(arguments_indexes, grouping_sets_keys_indices, force_grouping_standard_compatibility); + auto grouping_grouping_sets_function_adaptor = std::make_shared(std::move(grouping_grouping_sets_function)); + function_node->resolveAsFunction(std::move(grouping_grouping_sets_function_adaptor), std::make_shared()); + function_node->getArguments().getNodes().push_back(std::move(grouping_set_argument_column)); + break; + } + } + } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) + { + return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION); + } + +private: + GroupByKind group_by_kind; + std::unordered_map aggegation_key_to_index; + // Indexes of aggregation keys used in each grouping set (only for GROUP BY GROUPING SETS) + ColumnNumbersList grouping_sets_keys_indices; + const PlannerContext & planner_context; +}; + +void resolveGroupingFunctions(QueryTreeNodePtr & node, + GroupByKind group_by_kind, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context) +{ + auto & query_node_typed = node->as(); + + GroupingFunctionResolveVisitor visitor(group_by_kind, aggregation_keys, grouping_sets_parameters_list, planner_context); + + if (query_node_typed.hasHaving()) + visitor.visit(query_node_typed.getHaving()); + + if (query_node_typed.hasOrderBy()) + visitor.visit(query_node_typed.getOrderByNode()); + + visitor.visit(query_node_typed.getProjectionNode()); +} + +} + +void resolveGroupingFunctions(QueryTreeNodePtr & query_node, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context) +{ + auto & query_node_typed = query_node->as(); + + GroupByKind group_by_kind = GroupByKind::ORDINARY; + if (query_node_typed.isGroupByWithRollup()) + group_by_kind = GroupByKind::ROLLUP; + else if (query_node_typed.isGroupByWithCube()) + group_by_kind = GroupByKind::CUBE; + else if (query_node_typed.isGroupByWithGroupingSets()) + group_by_kind = GroupByKind::GROUPING_SETS; + + resolveGroupingFunctions(query_node, group_by_kind, aggregation_keys, grouping_sets_parameters_list, planner_context); +} + +AggregateDescriptions extractAggregateDescriptions(const QueryTreeNodes & aggregate_function_nodes, const PlannerContext & planner_context) +{ + QueryTreeNodeToName node_to_name; + NameSet unique_aggregate_action_node_names; + AggregateDescriptions aggregate_descriptions; + + for (const auto & aggregate_function_node : aggregate_function_nodes) + { + const auto & aggregate_function_node_typed = aggregate_function_node->as(); + String node_name = calculateActionNodeName(aggregate_function_node, planner_context, node_to_name); + auto [_, inserted] = unique_aggregate_action_node_names.emplace(node_name); + if (!inserted) + continue; + + AggregateDescription aggregate_description; + aggregate_description.function = aggregate_function_node_typed.getAggregateFunction(); + + const auto & parameters_nodes = aggregate_function_node_typed.getParameters().getNodes(); + aggregate_description.parameters.reserve(parameters_nodes.size()); + + for (const auto & parameter_node : parameters_nodes) + { + /// Function parameters constness validated during analysis stage + aggregate_description.parameters.push_back(parameter_node->getConstantValue().getValue()); + } + + const auto & arguments_nodes = aggregate_function_node_typed.getArguments().getNodes(); + aggregate_description.argument_names.reserve(arguments_nodes.size()); + + for (const auto & argument_node : arguments_nodes) + { + String argument_node_name = calculateActionNodeName(argument_node, planner_context, node_to_name); + aggregate_description.argument_names.emplace_back(std::move(argument_node_name)); + } + + aggregate_description.column_name = std::move(node_name); + aggregate_descriptions.push_back(std::move(aggregate_description)); + } + + return aggregate_descriptions; +} + +} diff --git a/src/Planner/PlannerAggregation.h b/src/Planner/PlannerAggregation.h new file mode 100644 index 00000000000..6dfd7faca22 --- /dev/null +++ b/src/Planner/PlannerAggregation.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +#include +#include + +#include + +namespace DB +{ + +/** Resolve GROUPING functions in query node. + * GROUPING function is replaced with specialized GROUPING function based on GROUP BY modifiers. + * For ROLLUP, CUBE, GROUPING SETS specialized GROUPING function take special __grouping_set column as argument. + */ +void resolveGroupingFunctions(QueryTreeNodePtr & query_node, + const Names & aggregation_keys, + const GroupingSetsParamsList & grouping_sets_parameters_list, + const PlannerContext & planner_context); + +/// Extract aggregate descriptions from aggregate function nodes +AggregateDescriptions extractAggregateDescriptions(const QueryTreeNodes & aggregate_function_nodes, const PlannerContext & planner_context); + +} diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp new file mode 100644 index 00000000000..9f4a489bf5f --- /dev/null +++ b/src/Planner/PlannerContext.cpp @@ -0,0 +1,174 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const QueryTreeNodePtr & column_node) +{ + const auto & column_node_typed = column_node->as(); + auto column_source_node = column_node_typed.getColumnSource(); + + return createColumnIdentifier(column_node_typed.getColumn(), column_source_node); +} + +const ColumnIdentifier & GlobalPlannerContext::createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & column_source_node) +{ + std::string column_identifier; + + if (column_source_node->hasAlias()) + column_identifier += column_source_node->getAlias(); + else if (const auto * table_source_node = column_source_node->as()) + column_identifier += table_source_node->getStorageID().getFullNameNotQuoted(); + + if (!column_identifier.empty()) + column_identifier += '.'; + + column_identifier += column.name; + column_identifier += '_' + std::to_string(column_identifiers.size()); + + auto [it, inserted] = column_identifiers.emplace(column_identifier); + assert(inserted); + + return *it; +} + +bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_identifier) +{ + return column_identifiers.contains(column_identifier); +} + +PlannerContext::PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_) + : query_context(std::move(query_context_)) + , global_planner_context(std::move(global_planner_context_)) +{} + +TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) +{ + auto [it, _] = table_expression_node_to_data.emplace(table_expression_node, TableExpressionData()); + return it->second; +} + +const TableExpressionData & PlannerContext::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table expression {} is not registered in planner context", + table_expression_node->formatASTForErrorMessage()); + + return table_expression_data_it->second; +} + +TableExpressionData & PlannerContext::getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table expression {} is not registered in planner context", + table_expression_node->formatASTForErrorMessage()); + + return table_expression_data_it->second; +} + +const TableExpressionData * PlannerContext::getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) const +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + return nullptr; + + return &table_expression_data_it->second; +} + +TableExpressionData * PlannerContext::getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) +{ + auto table_expression_data_it = table_expression_node_to_data.find(table_expression_node); + if (table_expression_data_it == table_expression_node_to_data.end()) + return nullptr; + + return &table_expression_data_it->second; +} + +const ColumnIdentifier & PlannerContext::getColumnNodeIdentifierOrThrow(const QueryTreeNodePtr & column_node) const +{ + auto & column_node_typed = column_node->as(); + const auto & column_name = column_node_typed.getColumnName(); + auto column_source = column_node_typed.getColumnSource(); + const auto & table_expression_data = getTableExpressionDataOrThrow(column_source); + return table_expression_data.getColumnIdentifierOrThrow(column_name); +} + +const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const QueryTreeNodePtr & column_node) const +{ + auto & column_node_typed = column_node->as(); + const auto & column_name = column_node_typed.getColumnName(); + auto column_source = column_node_typed.getColumnSourceOrNull(); + if (!column_source) + return nullptr; + + const auto * table_expression_data = getTableExpressionDataOrNull(column_source); + if (!table_expression_data) + return nullptr; + + return table_expression_data->getColumnIdentifierOrNull(column_name); +} + +PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set_source_node) +{ + auto set_source_hash = set_source_node->getTreeHash(); + return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); +} + +void PlannerContext::registerSet(const SetKey & key, PlannerSet planner_set) +{ + if (!planner_set.getSet()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set must be initialized"); + + const auto & subquery_node = planner_set.getSubqueryNode(); + if (subquery_node) + { + auto node_type = subquery_node->getNodeType(); + + if (node_type != QueryTreeNodeType::QUERY && + node_type != QueryTreeNodeType::UNION) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid node for set table expression. Expected query or union. Actual {}", + subquery_node->formatASTForErrorMessage()); + } + + set_key_to_set.emplace(key, std::move(planner_set)); +} + +bool PlannerContext::hasSet(const SetKey & key) const +{ + return set_key_to_set.contains(key); +} + +const PlannerSet & PlannerContext::getSetOrThrow(const SetKey & key) const +{ + auto it = set_key_to_set.find(key); + if (it == set_key_to_set.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "No set is registered for key {}", + key); + + return it->second; +} + +const PlannerSet * PlannerContext::getSetOrNull(const SetKey & key) const +{ + auto it = set_key_to_set.find(key); + if (it == set_key_to_set.end()) + return nullptr; + + return &it->second; +} + +} diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h new file mode 100644 index 00000000000..63874bf7ab9 --- /dev/null +++ b/src/Planner/PlannerContext.h @@ -0,0 +1,205 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +#include + +namespace DB +{ + +/** Global planner context contains common objects that are shared between each planner context. + * + * 1. Column identifiers. + */ +class GlobalPlannerContext +{ +public: + GlobalPlannerContext() = default; + + /** Create column identifier for column node. + * + * Result column identifier is added into context. + */ + const ColumnIdentifier & createColumnIdentifier(const QueryTreeNodePtr & column_node); + + /** Create column identifier for column and column source. + * + * Result column identifier is added into context. + */ + const ColumnIdentifier & createColumnIdentifier(const NameAndTypePair & column, const QueryTreeNodePtr & column_source_node); + + /// Check if context has column identifier + bool hasColumnIdentifier(const ColumnIdentifier & column_identifier); + +private: + std::unordered_set column_identifiers; +}; + +using GlobalPlannerContextPtr = std::shared_ptr; + +/** PlannerSet is wrapper around Set that is used during query planning. + * + * If subquery node is null, such set is already prepared for execution. + * + * If subquery node is not null, then set must be build from the result of the subquery. + * If subquery node is not null, it must have QUERY or UNION type. + */ +class PlannerSet +{ +public: + /// Construct planner set that is ready for execution + explicit PlannerSet(SetPtr set_) + : set(std::move(set_)) + {} + + /// Construct planner set with set and subquery node + explicit PlannerSet(SetPtr set_, QueryTreeNodePtr subquery_node_) + : set(std::move(set_)) + , subquery_node(std::move(subquery_node_)) + {} + + /// Get set + const SetPtr & getSet() const + { + return set; + } + + /// Get subquery node + const QueryTreeNodePtr & getSubqueryNode() const + { + return subquery_node; + } + +private: + SetPtr set; + + QueryTreeNodePtr subquery_node; +}; + +class PlannerContext +{ +public: + /// Create planner context with query context and global planner context + PlannerContext(ContextPtr query_context_, GlobalPlannerContextPtr global_planner_context_); + + /// Get planner context query context + const ContextPtr & getQueryContext() const + { + return query_context; + } + + /// Get planner context query context + ContextPtr & getQueryContext() + { + return query_context; + } + + /// Get global planner context + const GlobalPlannerContextPtr & getGlobalPlannerContext() const + { + return global_planner_context; + } + + /// Get global planner context + GlobalPlannerContextPtr & getGlobalPlannerContext() + { + return global_planner_context; + } + + /// Get or create table expression data for table expression node. + TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); + + /** Get table expression data. + * Exception is thrown if there are no table expression data for table expression node. + */ + const TableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node) const; + + /** Get table expression data. + * Exception is thrown if there are no table expression data for table expression node. + */ + TableExpressionData & getTableExpressionDataOrThrow(const QueryTreeNodePtr & table_expression_node); + + /** Get table expression data. + * Null is returned if there are no table expression data for table expression node. + */ + const TableExpressionData * getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node) const; + + /** Get table expression data. + * Null is returned if there are no table expression data for table expression node. + */ + TableExpressionData * getTableExpressionDataOrNull(const QueryTreeNodePtr & table_expression_node); + + /// Get table expression node to data read only map + const std::unordered_map & getTableExpressionNodeToData() const + { + return table_expression_node_to_data; + } + + /** Get column node identifier. + * For column node source check if table expression data is registered. + * If table expression data is not registered exception is thrown. + * In table expression data get column node identifier using column name. + */ + const ColumnIdentifier & getColumnNodeIdentifierOrThrow(const QueryTreeNodePtr & column_node) const; + + /** Get column node identifier. + * For column node source check if table expression data is registered. + * If table expression data is not registered null is returned. + * In table expression data get column node identifier or null using column name. + */ + const ColumnIdentifier * getColumnNodeIdentifierOrNull(const QueryTreeNodePtr & column_node) const; + + using SetKey = std::string; + + using SetKeyToSet = std::unordered_map; + + /// Create set key for set source node + static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); + + /// Register set for set key + void registerSet(const SetKey & key, PlannerSet planner_set); + + /// Returns true if set is registered for key, false otherwise + bool hasSet(const SetKey & key) const; + + /// Get set for key, if no set is registered logical exception is thrown + const PlannerSet & getSetOrThrow(const SetKey & key) const; + + /// Get set for key, if no set is registered null is returned + const PlannerSet * getSetOrNull(const SetKey & key) const; + + /// Get registered sets + const SetKeyToSet & getRegisteredSets() const + { + return set_key_to_set; + } + +private: + /// Query context + ContextPtr query_context; + + /// Global planner context + GlobalPlannerContextPtr global_planner_context; + + /// Column node to column identifier + std::unordered_map column_node_to_column_identifier; + + /// Table expression node to data + std::unordered_map table_expression_node_to_data; + + /// Set key to set + SetKeyToSet set_key_to_set; + +}; + +using PlannerContextPtr = std::shared_ptr; + +} diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp new file mode 100644 index 00000000000..b034edf97d8 --- /dev/null +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -0,0 +1,508 @@ +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +/** Construct filter analysis result for filter expression node + * Actions before filter are added into into actions chain. + * It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized. + */ +FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & filter_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + FilterAnalysisResult result; + + result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, filter_input, planner_context); + result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name; + actions_chain.addStep(std::make_unique(result.filter_actions)); + + return result; +} + +/** Construct aggregation analysis result if query tree has GROUP BY or aggregates. + * Actions before aggregation are added into actions chain, if result is not null optional. + */ +std::optional analyzeAggregation(QueryTreeNodePtr & query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + auto & query_node = query_tree->as(); + + auto aggregate_function_nodes = collectAggregateFunctionNodes(query_tree); + auto aggregates_descriptions = extractAggregateDescriptions(aggregate_function_nodes, *planner_context); + + ColumnsWithTypeAndName aggregates_columns; + aggregates_columns.reserve(aggregates_descriptions.size()); + for (auto & aggregate_description : aggregates_descriptions) + aggregates_columns.emplace_back(nullptr, aggregate_description.function->getReturnType(), aggregate_description.column_name); + + Names aggregation_keys; + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & group_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + ActionsDAGPtr before_aggregation_actions = std::make_shared(group_by_input); + before_aggregation_actions->getOutputs().clear(); + + std::unordered_set before_aggregation_actions_output_node_names; + + GroupingSetsParamsList grouping_sets_parameters_list; + bool group_by_with_constant_keys = false; + bool disable_grouping_sets = false; + + PlannerActionsVisitor actions_visitor(planner_context); + + /// Add expressions from GROUP BY + + if (query_node.hasGroupBy()) + { + if (query_node.isGroupByWithGroupingSets()) + { + for (auto & grouping_set_keys_list_node : query_node.getGroupBy().getNodes()) + { + auto & grouping_set_keys_list_node_typed = grouping_set_keys_list_node->as(); + grouping_sets_parameters_list.emplace_back(); + auto & grouping_sets_parameters = grouping_sets_parameters_list.back(); + + for (auto & grouping_set_key_node : grouping_set_keys_list_node_typed.getNodes()) + { + group_by_with_constant_keys |= grouping_set_key_node->hasConstantValue(); + + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); + aggregation_keys.reserve(expression_dag_nodes.size()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + grouping_sets_parameters.used_keys.push_back(expression_dag_node->result_name); + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + aggregation_keys.push_back(expression_dag_node->result_name); + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + for (auto & grouping_sets_parameter : grouping_sets_parameters_list) + { + NameSet grouping_sets_used_keys; + Names grouping_sets_keys; + + for (auto & key : grouping_sets_parameter.used_keys) + { + auto [_, inserted] = grouping_sets_used_keys.insert(key); + if (inserted) + grouping_sets_keys.push_back(key); + } + + for (auto & key : aggregation_keys) + { + if (grouping_sets_used_keys.contains(key)) + continue; + + grouping_sets_parameter.missing_keys.push_back(key); + } + + grouping_sets_parameter.used_keys = std::move(grouping_sets_keys); + } + + /// It is expected by execution layer that if there are only 1 grouping sets it will be removed + if (grouping_sets_parameters_list.size() == 1) + { + disable_grouping_sets = true; + grouping_sets_parameters_list.clear(); + } + } + else + { + for (auto & group_by_key_node : query_node.getGroupBy().getNodes()) + group_by_with_constant_keys |= group_by_key_node->hasConstantValue(); + + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, query_node.getGroupByNode()); + aggregation_keys.reserve(expression_dag_nodes.size()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + aggregation_keys.push_back(expression_dag_node->result_name); + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + /// Add expressions from aggregate functions arguments + + for (auto & aggregate_function_node : aggregate_function_nodes) + { + auto & aggregate_function_node_typed = aggregate_function_node->as(); + for (const auto & aggregate_function_node_argument : aggregate_function_node_typed.getArguments().getNodes()) + { + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, aggregate_function_node_argument); + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + if (aggregation_keys.empty() && aggregates_descriptions.empty()) + return {}; + + /** For non ordinary GROUP BY we add virtual __grouping_set column + * With set number, which is used as an additional key at the stage of merging aggregating data. + */ + if (query_node.isGroupByWithRollup() || query_node.isGroupByWithCube() || (query_node.isGroupByWithGroupingSets() && !disable_grouping_sets)) + aggregates_columns.emplace_back(nullptr, std::make_shared(), "__grouping_set"); + + resolveGroupingFunctions(query_tree, aggregation_keys, grouping_sets_parameters_list, *planner_context); + + /// Only aggregation keys and aggregates are available for next steps after GROUP BY step + auto aggregate_step = std::make_unique(before_aggregation_actions, ActionsChainStep::AvailableOutputColumnsStrategy::OUTPUT_NODES, aggregates_columns); + actions_chain.addStep(std::move(aggregate_step)); + + AggregationAnalysisResult aggregation_analysis_result; + aggregation_analysis_result.before_aggregation_actions = before_aggregation_actions; + aggregation_analysis_result.aggregation_keys = std::move(aggregation_keys); + aggregation_analysis_result.aggregate_descriptions = std::move(aggregates_descriptions); + aggregation_analysis_result.grouping_sets_parameters_list = std::move(grouping_sets_parameters_list); + aggregation_analysis_result.group_by_with_constant_keys = group_by_with_constant_keys; + + return aggregation_analysis_result; +} + +/** Construct window analysis result if query tree has window functions. + * Actions before window functions are added into actions chain, if result is not null optional. + */ +std::optional analyzeWindow(QueryTreeNodePtr & query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + auto window_function_nodes = collectWindowFunctionNodes(query_tree); + if (window_function_nodes.empty()) + return {}; + + auto window_descriptions = extractWindowDescriptions(window_function_nodes, *planner_context); + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & window_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + PlannerActionsVisitor actions_visitor(planner_context); + + ActionsDAGPtr before_window_actions = std::make_shared(window_input); + before_window_actions->getOutputs().clear(); + + std::unordered_set before_window_actions_output_node_names; + + for (auto & window_function_node : window_function_nodes) + { + auto & window_function_node_typed = window_function_node->as(); + auto & window_node = window_function_node_typed.getWindowNode()->as(); + + auto expression_dag_nodes = actions_visitor.visit(before_window_actions, window_function_node_typed.getArgumentsNode()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + + expression_dag_nodes = actions_visitor.visit(before_window_actions, window_node.getPartitionByNode()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + + /** We add only sort column sort expression in before WINDOW actions DAG. + * WITH fill expressions must be constant nodes. + */ + auto & order_by_node_list = window_node.getOrderBy(); + for (auto & sort_node : order_by_node_list.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + expression_dag_nodes = actions_visitor.visit(before_window_actions, sort_node_typed.getExpression()); + + for (auto & expression_dag_node : expression_dag_nodes) + { + if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) + continue; + + before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions_output_node_names.insert(expression_dag_node->result_name); + } + } + } + + ColumnsWithTypeAndName window_functions_additional_columns; + + for (auto & window_description : window_descriptions) + for (auto & window_function : window_description.window_functions) + window_functions_additional_columns.emplace_back(nullptr, window_function.aggregate_function->getReturnType(), window_function.column_name); + + auto before_window_step = std::make_unique(before_window_actions, + ActionsChainStep::AvailableOutputColumnsStrategy::ALL_NODES, + window_functions_additional_columns); + actions_chain.addStep(std::move(before_window_step)); + + WindowAnalysisResult result; + result.before_window_actions = std::move(before_window_actions); + result.window_descriptions = std::move(window_descriptions); + + return result; +} + +/** Construct projection analysis result. + * Projection actions are added into actions chain. + * It is client responsibility to update projection analysis result with project names actions after chain is finalized. + */ +ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & projection_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), projection_input, planner_context); + + auto projection_columns = query_node.getProjectionColumns(); + size_t projection_columns_size = projection_columns.size(); + + Names projection_column_names; + NamesWithAliases projection_column_names_with_display_aliases; + projection_column_names_with_display_aliases.reserve(projection_columns_size); + + auto & projection_actions_outputs = projection_actions->getOutputs(); + size_t projection_outputs_size = projection_actions_outputs.size(); + + if (projection_columns_size != projection_outputs_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "QueryTree projection nodes size mismatch. Expected {}. Actual {}", + projection_columns_size, + projection_outputs_size); + + for (size_t i = 0; i < projection_outputs_size; ++i) + { + auto & projection_column = projection_columns[i]; + const auto * projection_node = projection_actions_outputs[i]; + const auto & projection_node_name = projection_node->result_name; + + projection_column_names.push_back(projection_node_name); + projection_column_names_with_display_aliases.push_back({projection_node_name, projection_column.name}); + } + + auto projection_actions_step = std::make_unique(projection_actions); + actions_chain.addStep(std::move(projection_actions_step)); + + ProjectionAnalysisResult result; + result.projection_actions = std::move(projection_actions); + result.projection_column_names = std::move(projection_column_names); + result.projection_column_names_with_display_aliases = std::move(projection_column_names_with_display_aliases); + + return result; +} + +/** Construct sort analysis result. + * Actions before sort are added into actions chain. + */ +SortAnalysisResult analyzeSort(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto *chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & order_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + + ActionsDAGPtr before_sort_actions = std::make_shared(order_by_input); + auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); + before_sort_actions_outputs.clear(); + + PlannerActionsVisitor actions_visitor(planner_context); + + std::unordered_set before_sort_actions_dag_output_node_names; + + /** We add only sort node sort expression in before ORDER BY actions DAG. + * WITH fill expressions must be constant nodes. + */ + const auto & order_by_node_list = query_node.getOrderBy(); + for (const auto & sort_node : order_by_node_list.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + auto expression_dag_nodes = actions_visitor.visit(before_sort_actions, sort_node_typed.getExpression()); + + for (auto & action_dag_node : expression_dag_nodes) + { + if (before_sort_actions_dag_output_node_names.contains(action_dag_node->result_name)) + continue; + + before_sort_actions_outputs.push_back(action_dag_node); + before_sort_actions_dag_output_node_names.insert(action_dag_node->result_name); + } + } + + auto actions_step_before_sort = std::make_unique(before_sort_actions); + actions_chain.addStep(std::move(actions_step_before_sort)); + + return SortAnalysisResult{std::move(before_sort_actions)}; +} + +/** Construct limit by analysis result. + * Actions before limit by are added into actions chain. + */ +LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context, + ActionsChain & actions_chain) +{ + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & limit_by_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), limit_by_input, planner_context); + + Names limit_by_column_names; + limit_by_column_names.reserve(before_limit_by_actions->getOutputs().size()); + for (auto & output_node : before_limit_by_actions->getOutputs()) + limit_by_column_names.push_back(output_node->result_name); + + auto actions_step_before_limit_by = std::make_unique(before_limit_by_actions); + actions_chain.addStep(std::move(actions_step_before_limit_by)); + + return LimitByAnalysisResult{std::move(before_limit_by_actions), std::move(limit_by_column_names)}; +} + +} + +PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(QueryTreeNodePtr query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context) +{ + auto & query_node = query_tree->as(); + + ActionsChain actions_chain; + + std::optional where_analysis_result_optional; + std::optional where_action_step_index_optional; + + if (query_node.hasWhere()) + { + where_analysis_result_optional = analyzeFilter(query_node.getWhere(), join_tree_input_columns, planner_context, actions_chain); + where_action_step_index_optional = actions_chain.getLastStepIndex(); + } + + auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, join_tree_input_columns, planner_context, actions_chain); + + std::optional having_analysis_result_optional; + std::optional having_action_step_index_optional; + + if (query_node.hasHaving()) + { + having_analysis_result_optional = analyzeFilter(query_node.getHaving(), join_tree_input_columns, planner_context, actions_chain); + having_action_step_index_optional = actions_chain.getLastStepIndex(); + } + + auto window_analysis_result_optional = analyzeWindow(query_tree, join_tree_input_columns, planner_context, actions_chain); + auto projection_analysis_result = analyzeProjection(query_node, join_tree_input_columns, planner_context, actions_chain); + + std::optional sort_analysis_result_optional; + if (query_node.hasOrderBy()) + sort_analysis_result_optional = analyzeSort(query_node, join_tree_input_columns, planner_context, actions_chain); + + std::optional limit_by_analysis_result_optional; + + if (query_node.hasLimitBy()) + limit_by_analysis_result_optional = analyzeLimitBy(query_node, join_tree_input_columns, planner_context, actions_chain); + + const auto * chain_available_output_columns = actions_chain.getLastStepAvailableOutputColumnsOrNull(); + const auto & project_names_input = chain_available_output_columns ? *chain_available_output_columns : join_tree_input_columns; + auto project_names_actions = std::make_shared(project_names_input); + project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases); + actions_chain.addStep(std::make_unique(project_names_actions)); + + // std::cout << "Chain dump before finalize" << std::endl; + // std::cout << actions_chain.dump() << std::endl; + + actions_chain.finalize(); + + // std::cout << "Chain dump after finalize" << std::endl; + // std::cout << actions_chain.dump() << std::endl; + + projection_analysis_result.project_names_actions = std::move(project_names_actions); + + PlannerExpressionsAnalysisResult expressions_analysis_result(std::move(projection_analysis_result)); + + if (where_action_step_index_optional && where_analysis_result_optional) + { + auto & where_analysis_result = *where_analysis_result_optional; + auto & where_actions_chain_node = actions_chain.at(*where_action_step_index_optional); + where_analysis_result.remove_filter_column = !where_actions_chain_node->getChildRequiredOutputColumnsNames().contains(where_analysis_result.filter_column_name); + expressions_analysis_result.addWhere(std::move(where_analysis_result)); + } + + if (aggregation_analysis_result_optional) + expressions_analysis_result.addAggregation(std::move(*aggregation_analysis_result_optional)); + + if (having_action_step_index_optional && having_analysis_result_optional) + { + auto & having_analysis_result = *having_analysis_result_optional; + auto & having_actions_chain_node = actions_chain.at(*having_action_step_index_optional); + having_analysis_result.remove_filter_column = !having_actions_chain_node->getChildRequiredOutputColumnsNames().contains(having_analysis_result.filter_column_name); + expressions_analysis_result.addHaving(std::move(having_analysis_result)); + } + + if (window_analysis_result_optional) + expressions_analysis_result.addWindow(std::move(*window_analysis_result_optional)); + + if (sort_analysis_result_optional) + expressions_analysis_result.addSort(std::move(*sort_analysis_result_optional)); + + if (limit_by_analysis_result_optional) + expressions_analysis_result.addLimitBy(std::move(*limit_by_analysis_result_optional)); + + return expressions_analysis_result; +} + +} diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h new file mode 100644 index 00000000000..aefb3c369d0 --- /dev/null +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -0,0 +1,175 @@ +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +struct ProjectionAnalysisResult +{ + ActionsDAGPtr projection_actions; + Names projection_column_names; + NamesWithAliases projection_column_names_with_display_aliases; + ActionsDAGPtr project_names_actions; +}; + +struct FilterAnalysisResult +{ + ActionsDAGPtr filter_actions; + std::string filter_column_name; + bool remove_filter_column = false; +}; + +struct AggregationAnalysisResult +{ + ActionsDAGPtr before_aggregation_actions; + Names aggregation_keys; + AggregateDescriptions aggregate_descriptions; + GroupingSetsParamsList grouping_sets_parameters_list; + bool group_by_with_constant_keys = false; +}; + +struct WindowAnalysisResult +{ + ActionsDAGPtr before_window_actions; + std::vector window_descriptions; +}; + +struct SortAnalysisResult +{ + ActionsDAGPtr before_order_by_actions; +}; + +struct LimitByAnalysisResult +{ + ActionsDAGPtr before_limit_by_actions; + Names limit_by_column_names; +}; + +class PlannerExpressionsAnalysisResult +{ +public: + explicit PlannerExpressionsAnalysisResult(ProjectionAnalysisResult projection_analysis_result_) + : projection_analysis_result(std::move(projection_analysis_result_)) + {} + + const ProjectionAnalysisResult & getProjection() const + { + return projection_analysis_result; + } + + bool hasWhere() const + { + return where_analysis_result.filter_actions != nullptr; + } + + const FilterAnalysisResult & getWhere() const + { + return where_analysis_result; + } + + void addWhere(FilterAnalysisResult where_analysis_result_) + { + where_analysis_result = std::move(where_analysis_result_); + } + + bool hasAggregation() const + { + return !aggregation_analysis_result.aggregation_keys.empty() || !aggregation_analysis_result.aggregate_descriptions.empty(); + } + + const AggregationAnalysisResult & getAggregation() const + { + return aggregation_analysis_result; + } + + void addAggregation(AggregationAnalysisResult aggregation_analysis_result_) + { + aggregation_analysis_result = std::move(aggregation_analysis_result_); + } + + bool hasHaving() const + { + return having_analysis_result.filter_actions != nullptr; + } + + const FilterAnalysisResult & getHaving() const + { + return having_analysis_result; + } + + void addHaving(FilterAnalysisResult having_analysis_result_) + { + having_analysis_result = std::move(having_analysis_result_); + } + + bool hasWindow() const + { + return !window_analysis_result.window_descriptions.empty(); + } + + const WindowAnalysisResult & getWindow() const + { + return window_analysis_result; + } + + void addWindow(WindowAnalysisResult window_analysis_result_) + { + window_analysis_result = std::move(window_analysis_result_); + } + + bool hasSort() const + { + return sort_analysis_result.before_order_by_actions != nullptr; + } + + const SortAnalysisResult & getSort() const + { + return sort_analysis_result; + } + + void addSort(SortAnalysisResult sort_analysis_result_) + { + sort_analysis_result = std::move(sort_analysis_result_); + } + + bool hasLimitBy() const + { + return limit_by_analysis_result.before_limit_by_actions != nullptr; + } + + const LimitByAnalysisResult & getLimitBy() const + { + return limit_by_analysis_result; + } + + void addLimitBy(LimitByAnalysisResult limit_by_analysis_result_) + { + limit_by_analysis_result = std::move(limit_by_analysis_result_); + } + +private: + ProjectionAnalysisResult projection_analysis_result; + FilterAnalysisResult where_analysis_result; + AggregationAnalysisResult aggregation_analysis_result; + FilterAnalysisResult having_analysis_result; + WindowAnalysisResult window_analysis_result; + SortAnalysisResult sort_analysis_result; + LimitByAnalysisResult limit_by_analysis_result; +}; + +/// Build expression analysis result for query tree, join tree input columns and planner context +PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(QueryTreeNodePtr query_tree, + const ColumnsWithTypeAndName & join_tree_input_columns, + const PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp new file mode 100644 index 00000000000..4cb446a65a0 --- /dev/null +++ b/src/Planner/PlannerJoinTree.cpp @@ -0,0 +1,708 @@ +#include + +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int SYNTAX_ERROR; + extern const int ACCESS_DENIED; +} + +namespace +{ + +/// Check if current user has privileges to SELECT columns from table +void checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context) +{ + const auto & storage_id = table_node.getStorageID(); + const auto & storage_snapshot = table_node.getStorageSnapshot(); + + if (column_names.empty()) + { + /** For a trivial queries like "SELECT count() FROM table", "SELECT 1 FROM table" access is granted if at least + * one table column is accessible. + */ + auto access = query_context->getAccess(); + + for (const auto & column : storage_snapshot->metadata->getColumns()) + { + if (access->isGranted(AccessType::SELECT, storage_id.database_name, storage_id.table_name, column.name)) + return; + } + + throw Exception(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + query_context->getUserName(), + storage_id.getFullTableName()); + } + + query_context->checkAccess(AccessType::SELECT, storage_id, column_names); +} + +QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto * table_node = table_expression->as(); + auto * table_function_node = table_expression->as(); + auto * query_node = table_expression->as(); + auto * union_node = table_expression->as(); + + QueryPlan query_plan; + + auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); + + if (table_node || table_function_node) + { + const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); + const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(); + + auto table_expression_query_info = select_query_info; + table_expression_query_info.table_expression = table_expression; + + if (table_node) + table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers(); + else + table_expression_query_info.table_expression_modifiers = table_function_node->getTableExpressionModifiers(); + + auto & query_context = planner_context->getQueryContext(); + + auto from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + const auto & columns_names_set = table_expression_data.getColumnsNames(); + Names columns_names(columns_names_set.begin(), columns_names_set.end()); + + /** The current user must have the SELECT privilege. + * We do not check access rights for table functions because they have been already checked in ITableFunction::execute(). + */ + if (table_node) + { + auto column_names_with_aliases = columns_names; + const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); + column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); + checkAccessRights(*table_node, column_names_with_aliases, planner_context->getQueryContext()); + } + + if (columns_names.empty()) + { + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns()); + auto additional_column_to_read = column_names_and_types.front(); + + const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); + columns_names.push_back(additional_column_to_read.name); + table_expression_data.addColumn(additional_column_to_read, column_identifier); + } + + size_t max_block_size = query_context->getSettingsRef().max_block_size; + size_t max_streams = query_context->getSettingsRef().max_threads; + + bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names); + if (need_rewrite_query_with_final) + { + if (table_expression_query_info.table_expression_modifiers) + { + const auto & table_expression_modifiers = table_expression_query_info.table_expression_modifiers; + auto sample_size_ratio = table_expression_modifiers->getSampleSizeRatio(); + auto sample_offset_ratio = table_expression_modifiers->getSampleOffsetRatio(); + + table_expression_query_info.table_expression_modifiers = TableExpressionModifiers(true /*has_final*/, + sample_size_ratio, + sample_offset_ratio); + } + else + { + table_expression_query_info.table_expression_modifiers = TableExpressionModifiers(true /*has_final*/, + {} /*sample_size_ratio*/, + {} /*sample_offset_ratio*/); + } + } + + storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams); + + /// Create step which reads from empty source if storage has no data. + if (!query_plan.isInitialized()) + { + auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names); + Pipe pipe(std::make_shared(source_header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource"); + query_plan.addStep(std::move(read_from_pipe)); + } + } + else if (query_node || union_node) + { + auto subquery_options = select_query_options.subquery(); + auto subquery_context = buildSubqueryContext(planner_context->getQueryContext()); + Planner subquery_planner(table_expression, subquery_options, std::move(subquery_context), planner_context->getGlobalPlannerContext()); + subquery_planner.buildQueryPlanIfNeeded(); + query_plan = std::move(subquery_planner).extractQueryPlan(); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage()); + } + + auto rename_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : rename_actions_dag->getOutputs()) + { + const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name); + + if (!column_identifier) + continue; + + const auto * node_to_rename = output_node; + output_node = &rename_actions_dag->addAlias(*node_to_rename, *column_identifier); + } + + auto rename_step = std::make_unique(query_plan.getCurrentDataStream(), rename_actions_dag); + rename_step->setStepDescription("Change column names to column identifiers"); + query_plan.addStep(std::move(rename_step)); + + return query_plan; +} + +QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto & join_node = join_tree_node->as(); + + auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + JoinClausesAndActions join_clauses_and_actions; + JoinKind join_kind = join_node.getKind(); + + auto join_constant = tryExtractConstantFromJoinNode(join_tree_node); + if (join_constant) + { + /** If there is JOIN with always true constant, we transform it to cross. + * If there is JOIN with always false constant, we do not process JOIN keys. + * It is expected by join algorithm to handle such case. + * + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + */ + if (*join_constant) + join_kind = JoinKind::Cross; + } + else if (join_node.isOnJoinExpression()) + { + join_clauses_and_actions = buildJoinClausesAndActions(left_plan_output_columns, + right_plan_output_columns, + join_tree_node, + planner_context); + + join_clauses_and_actions.left_join_expressions_actions->projectInput(); + auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); + left_join_expressions_actions_step->setStepDescription("JOIN actions"); + left_plan.addStep(std::move(left_join_expressions_actions_step)); + + join_clauses_and_actions.right_join_expressions_actions->projectInput(); + auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); + right_join_expressions_actions_step->setStepDescription("JOIN actions"); + right_plan.addStep(std::move(right_join_expressions_actions_step)); + } + + std::unordered_map left_plan_column_name_to_cast_type; + std::unordered_map right_plan_column_name_to_cast_type; + + if (join_node.isUsingJoinExpression()) + { + auto & join_node_using_columns_list = join_node.getJoinExpression()->as(); + for (auto & join_node_using_node : join_node_using_columns_list.getNodes()) + { + auto & join_node_using_column_node = join_node_using_node->as(); + auto & inner_columns_list = join_node_using_column_node.getExpressionOrThrow()->as(); + + auto & left_inner_column_node = inner_columns_list.getNodes().at(0); + auto & left_inner_column = left_inner_column_node->as(); + + auto & right_inner_column_node = inner_columns_list.getNodes().at(1); + auto & right_inner_column = right_inner_column_node->as(); + + const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType(); + if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) + { + const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(left_inner_column_node); + left_plan_column_name_to_cast_type.emplace(left_inner_column_identifier, join_node_using_column_node_type); + } + + if (!right_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) + { + const auto & right_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(right_inner_column_node); + right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type); + } + } + } + + auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) + { + auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : cast_actions_dag->getOutputs()) + { + auto it = plan_column_name_to_cast_type.find(output_node->result_name); + if (it == plan_column_name_to_cast_type.end()) + continue; + + const auto & cast_type = it->second; + auto cast_type_name = cast_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName column; + column.name = calculateConstantActionNodeName(cast_type_constant_value); + column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + column.type = std::make_shared(); + + const auto * cast_type_constant_node = &cast_actions_dag->addColumn(std::move(column)); + + FunctionCastBase::Diagnostic diagnostic = {output_node->result_name, output_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {output_node, cast_type_constant_node}; + output_node = &cast_actions_dag->addFunction(func_builder_cast, std::move(children), output_node->result_name); + } + + cast_actions_dag->projectInput(); + auto cast_join_columns_step + = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); + cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); + plan_to_add_cast.addStep(std::move(cast_join_columns_step)); + }; + + if (!left_plan_column_name_to_cast_type.empty()) + join_cast_plan_output_nodes(left_plan, left_plan_column_name_to_cast_type); + + if (!right_plan_column_name_to_cast_type.empty()) + join_cast_plan_output_nodes(right_plan, right_plan_column_name_to_cast_type); + + const auto & query_context = planner_context->getQueryContext(); + const auto & settings = query_context->getSettingsRef(); + + bool join_use_nulls = settings.join_use_nulls; + auto to_nullable_function = FunctionFactory::instance().get("toNullable", query_context); + + auto join_cast_plan_columns_to_nullable = [&](QueryPlan & plan_to_add_cast) + { + auto cast_actions_dag = std::make_shared(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & output_node : cast_actions_dag->getOutputs()) + { + if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output_node->result_name)) + output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name); + } + + cast_actions_dag->projectInput(); + auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); + cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); + plan_to_add_cast.addStep(std::move(cast_join_columns_step)); + }; + + if (join_use_nulls) + { + if (isFull(join_kind)) + { + join_cast_plan_columns_to_nullable(left_plan); + join_cast_plan_columns_to_nullable(right_plan); + } + else if (isLeft(join_kind)) + { + join_cast_plan_columns_to_nullable(right_plan); + } + else if (isRight(join_kind)) + { + join_cast_plan_columns_to_nullable(left_plan); + } + } + + auto table_join = std::make_shared(settings, query_context->getTemporaryVolume()); + table_join->getTableJoin() = join_node.toASTTableJoin()->as(); + table_join->getTableJoin().kind = join_kind; + + if (join_kind == JoinKind::Comma) + { + join_kind = JoinKind::Cross; + table_join->getTableJoin().kind = JoinKind::Cross; + } + + table_join->setIsJoinWithConstant(join_constant != std::nullopt); + + if (join_node.isOnJoinExpression()) + { + const auto & join_clauses = join_clauses_and_actions.join_clauses; + bool is_asof = table_join->strictness() == JoinStrictness::Asof; + + if (join_clauses.size() > 1) + { + if (is_asof) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ASOF join {} doesn't support multiple ORs for keys in JOIN ON section", + join_node.formatASTForErrorMessage()); + } + + auto & table_join_clauses = table_join->getClauses(); + + for (const auto & join_clause : join_clauses) + { + table_join_clauses.emplace_back(); + auto & table_join_clause = table_join_clauses.back(); + + const auto & join_clause_left_key_nodes = join_clause.getLeftKeyNodes(); + const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes(); + + size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size(); + assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size()); + + for (size_t i = 0; i < join_clause_key_nodes_size; ++i) + { + table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name); + table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name); + } + + const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes(); + if (!join_clause_get_left_filter_condition_nodes.empty()) + { + if (join_clause_get_left_filter_condition_nodes.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} left filter conditions size must be 1. Actual {}", + join_node.formatASTForErrorMessage(), + join_clause_get_left_filter_condition_nodes.size()); + + const auto & join_clause_left_filter_condition_name = join_clause_get_left_filter_condition_nodes[0]->result_name; + table_join_clause.analyzer_left_filter_condition_column_name = join_clause_left_filter_condition_name; + } + + const auto & join_clause_get_right_filter_condition_nodes = join_clause.getRightFilterConditionNodes(); + if (!join_clause_get_right_filter_condition_nodes.empty()) + { + if (join_clause_get_right_filter_condition_nodes.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} right filter conditions size must be 1. Actual {}", + join_node.formatASTForErrorMessage(), + join_clause_get_right_filter_condition_nodes.size()); + + const auto & join_clause_right_filter_condition_name = join_clause_get_right_filter_condition_nodes[0]->result_name; + table_join_clause.analyzer_right_filter_condition_column_name = join_clause_right_filter_condition_name; + } + + if (is_asof) + { + if (!join_clause.hasASOF()) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} no inequality in ASOF JOIN ON section.", + join_node.formatASTForErrorMessage()); + + if (table_join_clause.key_names_left.size() <= 1) + throw Exception(ErrorCodes::SYNTAX_ERROR, + "JOIN {} ASOF join needs at least one equi-join column", + join_node.formatASTForErrorMessage()); + } + + if (join_clause.hasASOF()) + { + const auto & asof_conditions = join_clause.getASOFConditions(); + assert(asof_conditions.size() == 1); + + const auto & asof_condition = asof_conditions[0]; + table_join->setAsofInequality(asof_condition.asof_inequality); + + /// Execution layer of JOIN algorithms expects that ASOF keys are last JOIN keys + std::swap(table_join_clause.key_names_left.at(asof_condition.key_index), table_join_clause.key_names_left.back()); + std::swap(table_join_clause.key_names_right.at(asof_condition.key_index), table_join_clause.key_names_right.back()); + } + } + } + else if (join_node.isUsingJoinExpression()) + { + auto & table_join_clauses = table_join->getClauses(); + table_join_clauses.emplace_back(); + auto & table_join_clause = table_join_clauses.back(); + + auto & using_list = join_node.getJoinExpression()->as(); + + for (auto & join_using_node : using_list.getNodes()) + { + auto & join_using_column_node = join_using_node->as(); + auto & using_join_columns_list = join_using_column_node.getExpressionOrThrow()->as(); + auto & using_join_left_join_column_node = using_join_columns_list.getNodes().at(0); + auto & using_join_right_join_column_node = using_join_columns_list.getNodes().at(1); + + const auto & left_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(using_join_left_join_column_node); + const auto & right_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(using_join_right_join_column_node); + + table_join_clause.key_names_left.push_back(left_column_identifier); + table_join_clause.key_names_right.push_back(right_column_identifier); + } + } + + auto left_table_names = left_plan.getCurrentDataStream().header.getNames(); + NameSet left_table_names_set(left_table_names.begin(), left_table_names.end()); + + auto columns_from_joined_table = right_plan.getCurrentDataStream().header.getNamesAndTypesList(); + table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, ""); + + for (auto & column_from_joined_table : columns_from_joined_table) + { + if (planner_context->getGlobalPlannerContext()->hasColumnIdentifier(column_from_joined_table.name)) + table_join->addJoinedColumn(column_from_joined_table); + } + + auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), right_plan.getCurrentDataStream().header, planner_context); + + auto result_plan = QueryPlan(); + + if (join_algorithm->isFilled()) + { + size_t max_block_size = query_context->getSettingsRef().max_block_size; + + auto filled_join_step = std::make_unique( + left_plan.getCurrentDataStream(), + join_algorithm, + max_block_size); + + filled_join_step->setStepDescription("Filled JOIN"); + left_plan.addStep(std::move(filled_join_step)); + + result_plan = std::move(left_plan); + } + else + { + auto add_sorting = [&] (QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side) + { + SortDescription sort_description; + sort_description.reserve(key_names.size()); + for (const auto & key_name : key_names) + sort_description.emplace_back(key_name); + + auto sorting_step = std::make_unique( + plan.getCurrentDataStream(), + std::move(sort_description), + settings.max_block_size, + 0 /*limit*/, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), + settings.max_bytes_before_remerge_sort, + settings.remerge_sort_lowered_memory_bytes_ratio, + settings.max_bytes_before_external_sort, + query_context->getTempDataOnDisk(), + settings.min_free_disk_space_for_temporary_data, + settings.optimize_sorting_by_input_stream_properties); + sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_table_side)); + plan.addStep(std::move(sorting_step)); + }; + + auto crosswise_connection = CreateSetAndFilterOnTheFlyStep::createCrossConnection(); + auto add_create_set = [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side) + { + auto creating_set_step = std::make_unique( + plan.getCurrentDataStream(), + key_names, + settings.max_rows_in_set_to_optimize_join, + crosswise_connection, + join_table_side); + creating_set_step->setStepDescription(fmt::format("Create set and filter {} joined stream", join_table_side)); + + auto * step_raw_ptr = creating_set_step.get(); + plan.addStep(std::move(creating_set_step)); + return step_raw_ptr; + }; + + if (join_algorithm->pipelineType() == JoinPipelineType::YShaped) + { + const auto & join_clause = table_join->getOnlyClause(); + + bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering) + { + auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); + auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); + + if (isInnerOrLeft(join_kind)) + right_set->setFiltering(left_set->getSet()); + + if (isInnerOrRight(join_kind)) + left_set->setFiltering(right_set->getSet()); + } + + add_sorting(left_plan, join_clause.key_names_left, JoinTableSide::Left); + add_sorting(right_plan, join_clause.key_names_right, JoinTableSide::Right); + } + + size_t max_block_size = query_context->getSettingsRef().max_block_size; + size_t max_streams = query_context->getSettingsRef().max_threads; + + auto join_step = std::make_unique( + left_plan.getCurrentDataStream(), + right_plan.getCurrentDataStream(), + std::move(join_algorithm), + max_block_size, + max_streams, + false /*optimize_read_in_order*/); + + join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst)); + + std::vector plans; + plans.emplace_back(std::make_unique(std::move(left_plan))); + plans.emplace_back(std::make_unique(std::move(right_plan))); + + result_plan.unitePlans(std::move(join_step), {std::move(plans)}); + } + + auto drop_unused_columns_after_join_actions_dag = std::make_shared(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs updated_outputs; + std::unordered_set updated_outputs_names; + + for (auto & output : drop_unused_columns_after_join_actions_dag->getOutputs()) + { + if (updated_outputs_names.contains(output->result_name) || !planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output->result_name)) + continue; + + updated_outputs.push_back(output); + updated_outputs_names.insert(output->result_name); + } + + drop_unused_columns_after_join_actions_dag->getOutputs() = std::move(updated_outputs); + + auto drop_unused_columns_after_join_transform_step = std::make_unique(result_plan.getCurrentDataStream(), std::move(drop_unused_columns_after_join_actions_dag)); + drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN"); + result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step)); + + return result_plan; +} + +QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto & array_join_node = table_expression->as(); + + auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(), + select_query_info, + select_query_options, + planner_context); + auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); + + ActionsDAGPtr array_join_action_dag = std::make_shared(plan_output_columns); + PlannerActionsVisitor actions_visitor(planner_context); + + NameSet array_join_columns; + for (auto & array_join_expression : array_join_node.getJoinExpressions().getNodes()) + { + auto & array_join_expression_column = array_join_expression->as(); + const auto & array_join_column_name = array_join_expression_column.getColumnName(); + array_join_columns.insert(array_join_column_name); + + auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + for (auto & expression_dag_index_node : expression_dag_index_nodes) + { + const auto * array_join_column_node = &array_join_action_dag->addAlias(*expression_dag_index_node, array_join_column_name); + array_join_action_dag->getOutputs().push_back(array_join_column_node); + } + } + + array_join_action_dag->projectInput(); + auto array_join_actions = std::make_unique(plan.getCurrentDataStream(), array_join_action_dag); + array_join_actions->setStepDescription("ARRAY JOIN actions"); + plan.addStep(std::move(array_join_actions)); + + auto array_join_action = std::make_shared(array_join_columns, array_join_node.isLeft(), planner_context->getQueryContext()); + auto array_join_step = std::make_unique(plan.getCurrentDataStream(), std::move(array_join_action)); + array_join_step->setStepDescription("ARRAY JOIN"); + plan.addStep(std::move(array_join_step)); + + return plan; +} + +} + +QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context) +{ + auto join_tree_node_type = join_tree_node->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + { + return buildQueryPlanForTableExpression(join_tree_node, select_query_info, select_query_options, planner_context); + } + case QueryTreeNodeType::JOIN: + { + return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + } + case QueryTreeNodeType::ARRAY_JOIN: + { + return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected table, table function, query, union, join or array join query node. Actual {}", + join_tree_node->formatASTForErrorMessage()); + } + } +} + +} diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h new file mode 100644 index 00000000000..c93b71e0df1 --- /dev/null +++ b/src/Planner/PlannerJoinTree.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +#include + +#include + +namespace DB +{ + +/// Build query plan for query JOIN TREE node +QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, + SelectQueryInfo & select_query_info, + const SelectQueryOptions & select_query_options, + PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp new file mode 100644 index 00000000000..f62517eaaad --- /dev/null +++ b/src/Planner/PlannerJoins.cpp @@ -0,0 +1,695 @@ +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int NOT_IMPLEMENTED; +} + +void JoinClause::dump(WriteBuffer & buffer) const +{ + auto dump_dag_nodes = [&](const ActionsDAG::NodeRawConstPtrs & dag_nodes) + { + String dag_nodes_dump; + + if (!dag_nodes.empty()) + { + for (const auto & dag_node : dag_nodes) + { + dag_nodes_dump += dag_node->result_name; + dag_nodes_dump += ", "; + } + + dag_nodes_dump.pop_back(); + dag_nodes_dump.pop_back(); + } + + return dag_nodes_dump; + }; + + buffer << "left_key_nodes: " << dump_dag_nodes(left_key_nodes); + buffer << " right_key_nodes: " << dump_dag_nodes(right_key_nodes); + + if (!left_filter_condition_nodes.empty()) + buffer << " left_condition_nodes: " + dump_dag_nodes(left_filter_condition_nodes); + + if (!right_filter_condition_nodes.empty()) + buffer << " right_condition_nodes: " + dump_dag_nodes(right_filter_condition_nodes); +} + +String JoinClause::dump() const +{ + WriteBufferFromOwnString buffer; + dump(buffer); + + return buffer.str(); +} + +namespace +{ + +std::optional extractJoinTableSideFromExpression(const ActionsDAG::Node * expression_root_node, + const std::unordered_set & join_expression_dag_input_nodes, + const NameSet & left_table_expression_columns_names, + const NameSet & right_table_expression_columns_names, + const JoinNode & join_node) +{ + std::optional table_side; + std::vector nodes_to_process; + nodes_to_process.push_back(expression_root_node); + + while (!nodes_to_process.empty()) + { + const auto * node_to_process = nodes_to_process.back(); + nodes_to_process.pop_back(); + + for (const auto & child : node_to_process->children) + nodes_to_process.push_back(child); + + if (!join_expression_dag_input_nodes.contains(node_to_process)) + continue; + + const auto & input_name = node_to_process->result_name; + + bool left_table_expression_contains_input = left_table_expression_columns_names.contains(input_name); + bool right_table_expression_contains_input = right_table_expression_columns_names.contains(input_name); + + if (!left_table_expression_contains_input && !right_table_expression_contains_input) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns", + join_node.formatASTForErrorMessage(), + input_name, + boost::join(left_table_expression_columns_names, ", "), + boost::join(right_table_expression_columns_names, ", ")); + + auto input_table_side = left_table_expression_contains_input ? JoinTableSide::Left : JoinTableSide::Right; + if (table_side && (*table_side) != input_table_side) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression contains column from left and right table", + join_node.formatASTForErrorMessage()); + + table_side = input_table_side; + } + + return table_side; +} + +void buildJoinClause(ActionsDAGPtr join_expression_dag, + const std::unordered_set & join_expression_dag_input_nodes, + const ActionsDAG::Node * join_expressions_actions_node, + const NameSet & left_table_expression_columns_names, + const NameSet & right_table_expression_columns_names, + const JoinNode & join_node, + JoinClause & join_clause) +{ + std::string function_name; + + if (join_expressions_actions_node->function) + function_name = join_expressions_actions_node->function->getName(); + + /// For 'and' function go into children + if (function_name == "and") + { + for (const auto & child : join_expressions_actions_node->children) + { + buildJoinClause(join_expression_dag, + join_expression_dag_input_nodes, + child, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node, + join_clause); + } + + return; + } + + auto asof_inequality = getASOFJoinInequality(function_name); + bool is_asof_join_inequality = join_node.getStrictness() == JoinStrictness::Asof && asof_inequality != ASOFJoinInequality::None; + + if (function_name == "equals" || is_asof_join_inequality) + { + const auto * left_child = join_expressions_actions_node->children.at(0); + const auto * right_child = join_expressions_actions_node->children.at(1); + + auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + if (!left_expression_side_optional && !right_expression_side_optional) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} ON expression {} with constants is not supported", + join_node.formatASTForErrorMessage(), + join_expressions_actions_node->result_name); + } + else if (left_expression_side_optional && !right_expression_side_optional) + { + join_clause.addCondition(*left_expression_side_optional, join_expressions_actions_node); + } + else if (!left_expression_side_optional && right_expression_side_optional) + { + join_clause.addCondition(*right_expression_side_optional, join_expressions_actions_node); + } + else + { + auto left_expression_side = *left_expression_side_optional; + auto right_expression_side = *right_expression_side_optional; + + if (left_expression_side != right_expression_side) + { + const ActionsDAG::Node * left_key = left_child; + const ActionsDAG::Node * right_key = right_child; + + if (left_expression_side == JoinTableSide::Right) + { + left_key = right_child; + right_key = left_child; + asof_inequality = reverseASOFJoinInequality(asof_inequality); + } + + if (is_asof_join_inequality) + { + if (join_clause.hasASOF()) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} ASOF JOIN expects exactly one inequality in ON section", + join_node.formatASTForErrorMessage()); + } + + join_clause.addASOFKey(left_key, right_key, asof_inequality); + } + else + { + join_clause.addKey(left_key, right_key); + } + } + else + { + join_clause.addCondition(left_expression_side, join_expressions_actions_node); + } + } + + return; + } + + auto expression_side_optional = extractJoinTableSideFromExpression(join_expressions_actions_node, + join_expression_dag_input_nodes, + left_table_expression_columns_names, + right_table_expression_columns_names, + join_node); + + if (!expression_side_optional) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} with constants is not supported", + join_node.formatASTForErrorMessage()); + + auto expression_side = *expression_side_optional; + join_clause.addCondition(expression_side, join_expressions_actions_node); +} + +JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName & join_expression_input_columns, + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const JoinNode & join_node, + const PlannerContextPtr & planner_context) +{ + ActionsDAGPtr join_expression_actions = std::make_shared(join_expression_input_columns); + + /** In ActionsDAG if input node has constant representation additional constant column is added. + * That way we cannot simply check that node has INPUT type during resolution of expression join table side. + * Put all nodes after actions dag initialization in set. + * To check if actions dag node is input column, we check if set contains it. + */ + const auto & join_expression_actions_nodes = join_expression_actions->getNodes(); + + std::unordered_set join_expression_dag_input_nodes; + join_expression_dag_input_nodes.reserve(join_expression_actions_nodes.size()); + for (const auto & node : join_expression_actions_nodes) + join_expression_dag_input_nodes.insert(&node); + + PlannerActionsVisitor join_expression_visitor(planner_context); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(join_expression_actions, join_node.getJoinExpression()); + if (join_expression_dag_node_raw_pointers.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} ON clause contains multiple expressions", + join_node.formatASTForErrorMessage()); + + const auto * join_expressions_actions_root_node = join_expression_dag_node_raw_pointers[0]; + if (!join_expressions_actions_root_node->function) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression expected function", + join_node.formatASTForErrorMessage()); + + size_t left_table_expression_columns_size = left_table_expression_columns.size(); + + Names join_left_actions_names; + join_left_actions_names.reserve(left_table_expression_columns_size); + + NameSet join_left_actions_names_set; + join_left_actions_names_set.reserve(left_table_expression_columns_size); + + for (const auto & left_table_expression_column : left_table_expression_columns) + { + join_left_actions_names.push_back(left_table_expression_column.name); + join_left_actions_names_set.insert(left_table_expression_column.name); + } + + size_t right_table_expression_columns_size = right_table_expression_columns.size(); + + Names join_right_actions_names; + join_right_actions_names.reserve(right_table_expression_columns_size); + + NameSet join_right_actions_names_set; + join_right_actions_names_set.reserve(right_table_expression_columns_size); + + for (const auto & right_table_expression_column : right_table_expression_columns) + { + join_right_actions_names.push_back(right_table_expression_column.name); + join_right_actions_names_set.insert(right_table_expression_column.name); + } + + JoinClausesAndActions result; + result.join_expression_actions = join_expression_actions; + + const auto & function_name = join_expressions_actions_root_node->function->getName(); + if (function_name == "or") + { + for (const auto & child : join_expressions_actions_root_node->children) + { + result.join_clauses.emplace_back(); + + buildJoinClause(join_expression_actions, + join_expression_dag_input_nodes, + child, + join_left_actions_names_set, + join_right_actions_names_set, + join_node, + result.join_clauses.back()); + } + } + else + { + result.join_clauses.emplace_back(); + + buildJoinClause(join_expression_actions, + join_expression_dag_input_nodes, + join_expressions_actions_root_node, + join_left_actions_names_set, + join_right_actions_names_set, + join_node, + result.join_clauses.back()); + } + + auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext()); + + auto add_necessary_name_if_needed = [&](JoinTableSide join_table_side, const String & name) + { + auto & necessary_names = join_table_side == JoinTableSide::Left ? join_left_actions_names : join_right_actions_names; + auto & necessary_names_set = join_table_side == JoinTableSide::Left ? join_left_actions_names_set : join_right_actions_names_set; + + auto [_, inserted] = necessary_names_set.emplace(name); + if (inserted) + necessary_names.push_back(name); + }; + + for (auto & join_clause : result.join_clauses) + { + const auto & left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes(); + if (!left_filter_condition_nodes.empty()) + { + const ActionsDAG::Node * dag_filter_condition_node = nullptr; + + if (left_filter_condition_nodes.size() > 1) + dag_filter_condition_node = &join_expression_actions->addFunction(and_function, left_filter_condition_nodes, {}); + else + dag_filter_condition_node = left_filter_condition_nodes[0]; + + join_clause.getLeftFilterConditionNodes() = {dag_filter_condition_node}; + join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + + add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name); + } + + const auto & right_filter_condition_nodes = join_clause.getRightFilterConditionNodes(); + if (!right_filter_condition_nodes.empty()) + { + const ActionsDAG::Node * dag_filter_condition_node = nullptr; + + if (right_filter_condition_nodes.size() > 1) + dag_filter_condition_node = &join_expression_actions->addFunction(and_function, right_filter_condition_nodes, {}); + else + dag_filter_condition_node = right_filter_condition_nodes[0]; + + join_clause.getRightFilterConditionNodes() = {dag_filter_condition_node}; + join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node); + + add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name); + } + + assert(join_clause.getLeftKeyNodes().size() == join_clause.getRightKeyNodes().size()); + size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size(); + + if (join_clause_key_nodes_size == 0) + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys", + join_node.formatASTForErrorMessage()); + + for (size_t i = 0; i < join_clause_key_nodes_size; ++i) + { + auto & left_key_node = join_clause.getLeftKeyNodes()[i]; + auto & right_key_node = join_clause.getRightKeyNodes()[i]; + + if (!left_key_node->result_type->equals(*right_key_node->result_type)) + { + DataTypePtr common_type; + + try + { + common_type = getLeastSupertype(DataTypes{left_key_node->result_type, right_key_node->result_type}); + } + catch (Exception & ex) + { + ex.addMessage("JOIN {} cannot infer common type in ON section for keys. Left key {} type {}. Right key {} type {}", + join_node.formatASTForErrorMessage(), + left_key_node->result_name, + left_key_node->result_type->getName(), + right_key_node->result_name, + right_key_node->result_type->getName()); + throw; + } + + auto cast_type_name = common_type->getName(); + Field cast_type_constant_value(cast_type_name); + + ColumnWithTypeAndName cast_column; + cast_column.name = calculateConstantActionNodeName(cast_type_constant_value); + cast_column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); + cast_column.type = std::make_shared(); + + const ActionsDAG::Node * cast_type_constant_node = nullptr; + + if (!left_key_node->result_type->equals(*common_type)) + { + cast_type_constant_node = &join_expression_actions->addColumn(cast_column); + + FunctionCastBase::Diagnostic diagnostic = {left_key_node->result_name, left_key_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(diagnostic); + + ActionsDAG::NodeRawConstPtrs children = {left_key_node, cast_type_constant_node}; + left_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); + } + + if (!right_key_node->result_type->equals(*common_type)) + { + if (!cast_type_constant_node) + cast_type_constant_node = &join_expression_actions->addColumn(cast_column); + + FunctionCastBase::Diagnostic diagnostic = {right_key_node->result_name, right_key_node->result_name}; + FunctionOverloadResolverPtr func_builder_cast + = CastInternalOverloadResolver::createImpl(std::move(diagnostic)); + + ActionsDAG::NodeRawConstPtrs children = {right_key_node, cast_type_constant_node}; + right_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {}); + } + } + + join_expression_actions->addOrReplaceInOutputs(*left_key_node); + join_expression_actions->addOrReplaceInOutputs(*right_key_node); + + add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name); + add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name); + } + } + + result.left_join_expressions_actions = join_expression_actions->clone(); + result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names); + + result.right_join_expressions_actions = join_expression_actions->clone(); + result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); + + return result; +} + +} + +JoinClausesAndActions buildJoinClausesAndActions( + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const QueryTreeNodePtr & join_node, + const PlannerContextPtr & planner_context) +{ + auto & join_node_typed = join_node->as(); + if (!join_node_typed.isOnJoinExpression()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "JOIN {} join does not have ON section", + join_node_typed.formatASTForErrorMessage()); + + auto join_expression_input_columns = left_table_expression_columns; + join_expression_input_columns.insert(join_expression_input_columns.end(), right_table_expression_columns.begin(), right_table_expression_columns.end()); + + return buildJoinClausesAndActions(join_expression_input_columns, left_table_expression_columns, right_table_expression_columns, join_node_typed, planner_context); +} + +std::optional tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node) +{ + auto & join_node_typed = join_node->as(); + if (!join_node_typed.getJoinExpression()) + return {}; + + auto constant_value = join_node_typed.getJoinExpression()->getConstantValueOrNull(); + if (!constant_value) + return {}; + + const auto & value = constant_value->getValue(); + auto constant_type = constant_value->getType(); + constant_type = removeNullable(removeLowCardinality(constant_type)); + + auto which_constant_type = WhichDataType(constant_type); + if (!which_constant_type.isUInt8() && !which_constant_type.isNothing()) + return {}; + + if (value.isNull()) + return false; + + UInt8 predicate_value = value.safeGet(); + return predicate_value > 0; +} + +namespace +{ + +void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::shared_ptr & table_join) +{ + StoragePtr storage; + + if (auto * table_node = table_expression->as()) + storage = table_node->getStorage(); + else if (auto * table_function = table_expression->as()) + storage = table_function->getStorage(); + + auto storage_join = std::dynamic_pointer_cast(storage); + if (storage_join) + { + table_join->setStorageJoin(storage_join); + return; + } + + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + return; + + if (auto storage_dictionary = std::dynamic_pointer_cast(storage); storage_dictionary) + table_join->setStorageJoin(std::dynamic_pointer_cast(storage_dictionary->getDictionary())); + else if (auto storage_key_value = std::dynamic_pointer_cast(storage); storage_key_value) + table_join->setStorageJoin(storage_key_value); +} + +std::shared_ptr tryDirectJoin(const std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context) +{ + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + return {}; + + auto storage = table_join->getStorageKeyValue(); + if (!storage) + return {}; + + bool allowed_inner = isInner(table_join->kind()) && table_join->strictness() == JoinStrictness::All; + bool allowed_left = isLeft(table_join->kind()) && (table_join->strictness() == JoinStrictness::Any || + table_join->strictness() == JoinStrictness::All || + table_join->strictness() == JoinStrictness::Semi || + table_join->strictness() == JoinStrictness::Anti); + if (!allowed_inner && !allowed_left) + return {}; + + const auto & clauses = table_join->getClauses(); + bool only_one_key = clauses.size() == 1 && + clauses[0].key_names_left.size() == 1 && + clauses[0].key_names_right.size() == 1 && + !clauses[0].on_filter_condition_left && + !clauses[0].on_filter_condition_right && + clauses[0].analyzer_left_filter_condition_column_name.empty() && + clauses[0].analyzer_right_filter_condition_column_name.empty(); + + if (!only_one_key) + return {}; + + const String & key_name = clauses[0].key_names_right[0]; + + auto & right_table_expression_data = planner_context->getTableExpressionDataOrThrow(right_table_expression); + const auto * table_column_name = right_table_expression_data.getColumnNameOrNull(key_name); + if (!table_column_name) + return {}; + + const auto & storage_primary_key = storage->getPrimaryKey(); + if (storage_primary_key.size() != 1 || storage_primary_key[0] != *table_column_name) + return {}; + + /** For right table expression during execution columns have unique name. + * Direct key value join implementation during storage querying must use storage column names. + * + * Example: + * CREATE DICTIONARY test_dictionary (id UInt64, value String) PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'test_dictionary_table')) LIFETIME(0); + * SELECT t1.id FROM test_table AS t1 INNER JOIN test_dictionary AS t2 ON t1.id = t2.id; + * + * Unique execution name for `id` column from right table expression `test_dictionary AS t2` for example can be `t2.id_0`. + * Storage column name is `id`. + * + * Here we create header for right table expression with original storage column names. + */ + Block right_table_expression_header_with_storage_column_names; + + for (const auto & right_table_expression_column : right_table_expression_header) + { + const auto * table_column_name = right_table_expression_data.getColumnNameOrNull(right_table_expression_column.name); + if (!table_column_name) + return {}; + + auto right_table_expression_column_with_storage_column_name = right_table_expression_column; + right_table_expression_column_with_storage_column_name.name = *table_column_name; + right_table_expression_header_with_storage_column_names.insert(right_table_expression_column_with_storage_column_name); + } + + return std::make_shared(table_join, right_table_expression_header, storage, right_table_expression_header_with_storage_column_names); +} + +} + +std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context) +{ + trySetStorageInTableJoin(right_table_expression, table_join); + + /// JOIN with JOIN engine. + if (auto storage = table_join->getStorageJoin()) + return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + + /** JOIN with constant. + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + */ + if (table_join->isJoinWithConstant()) + { + if (!table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "JOIN with constant supported only with join algorithm 'hash'"); + + return std::make_shared(table_join, right_table_expression_header); + } + + if (!table_join->oneDisjunct() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Only `hash` join supports multiple ORs for keys in JOIN ON section"); + + /// Direct JOIN with special storages that support key value access. For example JOIN with Dictionary + if (table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) + { + JoinPtr direct_join = tryDirectJoin(table_join, right_table_expression, right_table_expression_header, planner_context); + if (direct_join) + return direct_join; + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::PARTIAL_MERGE) || + table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE)) + { + if (MergeJoin::isSupported(table_join)) + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) || + /// partial_merge is preferred, but can't be used for specified kind of join, fallback to hash + table_join->isEnabledAlgorithm(JoinAlgorithm::PREFER_PARTIAL_MERGE) || + table_join->isEnabledAlgorithm(JoinAlgorithm::PARALLEL_HASH)) + { + if (table_join->allowParallelHashJoin()) + { + auto query_context = planner_context->getQueryContext(); + return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + } + + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::FULL_SORTING_MERGE)) + { + if (FullSortingMergeJoin::isSupported(table_join)) + return std::make_shared(table_join, right_table_expression_header); + } + + if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) + return std::make_shared(table_join, right_table_expression_header); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); +} + +} diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h new file mode 100644 index 00000000000..d305249e789 --- /dev/null +++ b/src/Planner/PlannerJoins.h @@ -0,0 +1,196 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +/** Join clause represent single JOIN ON section clause. + * Join clause consists of JOIN keys and conditions. + * + * JOIN can contain multiple clauses in JOIN ON section. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id OR t1.value = t2.value; + * t1.id = t2.id is first clause. + * t1.value = t2.value is second clause. + * + * JOIN ON section can also contain condition inside clause. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id AND t1.id > 0 AND t2.id > 0; + * t1.id = t2.id AND t1.id > 0 AND t2.id > 0 is first clause. + * t1.id = t2.id is JOIN keys section. + * t1.id > 0 is left table condition. + * t2.id > 0 is right table condition. + * + * Additionally not only conditions, but JOIN keys can be represented as expressions. + * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON toString(t1.id) = toString(t2.id). + * toString(t1.id) = toString(t2.id) is JOIN keys section. Where toString(t1.id) is left key, and toString(t2.id) is right key. + * + * During query planning JOIN ON section represented using join clause structure. It is important to split + * keys and conditions. And for each action detect from which stream it can be performed. + * + * We have 2 streams, left stream and right stream. + * We split JOIN ON section expressions actions in two parts left join expression actions and right join expression actions. + * Left join expression actions must be used to calculate necessary actions for left stream. + * Right join expression actions must be used to calculate necessary actions for right stream. + */ +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + +struct ASOFCondition +{ + size_t key_index; + ASOFJoinInequality asof_inequality; +}; + +/// Single JOIN ON section clause representation +class JoinClause +{ +public: + /// Add keys + void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node) + { + left_key_nodes.emplace_back(left_key_node); + right_key_nodes.emplace_back(right_key_node); + } + + void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality) + { + left_key_nodes.emplace_back(left_key_node); + right_key_nodes.emplace_back(right_key_node); + asof_conditions.push_back(ASOFCondition{left_key_nodes.size() - 1, asof_inequality}); + } + + /// Add condition for table side + void addCondition(JoinTableSide table_side, const ActionsDAG::Node * condition_node) + { + auto & filter_condition_nodes = table_side == JoinTableSide::Left ? left_filter_condition_nodes : right_filter_condition_nodes; + filter_condition_nodes.push_back(condition_node); + } + + /// Get left key nodes + const ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() const + { + return left_key_nodes; + } + + /// Get left key nodes + ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() + { + return left_key_nodes; + } + + /// Get right key nodes + const ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() const + { + return right_key_nodes; + } + + /// Get right key nodes + ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() + { + return right_key_nodes; + } + + /// Returns true if JOIN clause has ASOF conditions, false otherwise + bool hasASOF() const + { + return !asof_conditions.empty(); + } + + /// Get ASOF conditions + const std::vector & getASOFConditions() const + { + return asof_conditions; + } + + /// Get left filter condition nodes + const ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() const + { + return left_filter_condition_nodes; + } + + /// Get left filter condition nodes + ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() + { + return left_filter_condition_nodes; + } + + /// Get right filter condition nodes + const ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() const + { + return right_filter_condition_nodes; + } + + /// Get right filter condition nodes + ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() + { + return right_filter_condition_nodes; + } + + /// Dump clause into buffer + void dump(WriteBuffer & buffer) const; + + /// Dump clause + String dump() const; + +private: + ActionsDAG::NodeRawConstPtrs left_key_nodes; + ActionsDAG::NodeRawConstPtrs right_key_nodes; + + std::vector asof_conditions; + + ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes; + ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes; +}; + +using JoinClauses = std::vector; + +struct JoinClausesAndActions +{ + /// Join clauses. Actions dag nodes point into join_expression_actions. + JoinClauses join_clauses; + /// Whole JOIN ON section expressions + ActionsDAGPtr join_expression_actions; + /// Left join expressions actions + ActionsDAGPtr left_join_expressions_actions; + /// Right join expressions actions + ActionsDAGPtr right_join_expressions_actions; +}; + +/** Calculate join clauses and actions for JOIN ON section. + * + * left_table_expression_columns - columns from left join stream. + * right_table_expression_columns - columns from right join stream. + * join_node - join query tree node. + * planner_context - planner context. + */ +JoinClausesAndActions buildJoinClausesAndActions( + const ColumnsWithTypeAndName & left_table_expression_columns, + const ColumnsWithTypeAndName & right_table_expression_columns, + const QueryTreeNodePtr & join_node, + const PlannerContextPtr & planner_context); + +/** Try extract boolean constant from JOIN expression. + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; + * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1 != 1; + * + * join_node - join query tree node. + */ +std::optional tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node); + +/** Choose JOIN algorithm for table join, right table expression, right table expression header and planner context. + * Table join structure can be modified during JOIN algorithm choosing for special JOIN algorithms. + * For example JOIN with Dictionary engine, or JOIN with JOIN engine. + */ +std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_join, + const QueryTreeNodePtr & right_table_expression, + const Block & right_table_expression_header, + const PlannerContextPtr & planner_context); + +} diff --git a/src/Planner/PlannerSorting.cpp b/src/Planner/PlannerSorting.cpp new file mode 100644 index 00000000000..5ae8bd1e21b --- /dev/null +++ b/src/Planner/PlannerSorting.cpp @@ -0,0 +1,157 @@ +#include + +#include + +#include + +#include + +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_WITH_FILL_EXPRESSION; +} + +namespace +{ + +std::pair extractWithFillValue(const QueryTreeNodePtr & node) +{ + const auto & constant_value = node->getConstantValue(); + + std::pair result; + result.first = constant_value.getValue(); + result.second = constant_value.getType(); + + if (!isColumnedAsNumber(result.second)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL expression must be constant with numeric type"); + + return result; +} + +std::pair> extractWithFillStepValue(const QueryTreeNodePtr & node) +{ + const auto & constant_value = node->getConstantValue(); + + const auto & constant_node_result_type = constant_value.getType(); + if (const auto * type_interval = typeid_cast(constant_node_result_type.get())) + return std::make_pair(constant_value.getValue(), type_interval->getKind()); + + if (!isColumnedAsNumber(constant_node_result_type)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL expression must be constant with numeric type"); + + return {constant_value.getValue(), {}}; +} + +FillColumnDescription extractWithFillDescription(const SortNode & sort_node) +{ + FillColumnDescription fill_column_description; + + if (sort_node.hasFillFrom()) + { + auto extract_result = extractWithFillValue(sort_node.getFillFrom()); + fill_column_description.fill_from = std::move(extract_result.first); + fill_column_description.fill_from_type = std::move(extract_result.second); + } + + if (sort_node.hasFillTo()) + { + auto extract_result = extractWithFillValue(sort_node.getFillTo()); + fill_column_description.fill_to = std::move(extract_result.first); + fill_column_description.fill_to_type = std::move(extract_result.second); + } + + if (sort_node.hasFillStep()) + { + auto extract_result = extractWithFillStepValue(sort_node.getFillStep()); + fill_column_description.fill_step = std::move(extract_result.first); + fill_column_description.step_kind = std::move(extract_result.second); + } + else + { + auto direction_value = sort_node.getSortDirection() == SortDirection::ASCENDING ? static_cast(1) : static_cast(-1); + fill_column_description.fill_step = Field(direction_value); + } + + if (applyVisitor(FieldVisitorAccurateEquals(), fill_column_description.fill_step, Field{0})) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be zero"); + + if (sort_node.getSortDirection() == SortDirection::ASCENDING) + { + if (applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_step, Field{0})) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be negative for sorting in ascending direction"); + + if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() && + applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_to, fill_column_description.fill_from)) + { + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL TO value cannot be less than FROM value for sorting in ascending direction"); + } + } + else + { + if (applyVisitor(FieldVisitorAccurateLess(), Field{0}, fill_column_description.fill_step)) + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL STEP value cannot be positive for sorting in descending direction"); + + if (!fill_column_description.fill_from.isNull() && !fill_column_description.fill_to.isNull() && + applyVisitor(FieldVisitorAccurateLess(), fill_column_description.fill_from, fill_column_description.fill_to)) + { + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL FROM value cannot be less than TO value for sorting in descending direction"); + } + } + + return fill_column_description; +} + +} + +SortDescription extractSortDescription(const QueryTreeNodePtr & order_by_node, const PlannerContext & planner_context) +{ + auto & order_by_list_node = order_by_node->as(); + + SortDescription sort_column_description; + sort_column_description.reserve(order_by_list_node.getNodes().size()); + + for (const auto & sort_node : order_by_list_node.getNodes()) + { + auto & sort_node_typed = sort_node->as(); + + auto column_name = calculateActionNodeName(sort_node_typed.getExpression(), planner_context); + std::shared_ptr collator = sort_node_typed.getCollator(); + int direction = sort_node_typed.getSortDirection() == SortDirection::ASCENDING ? 1 : -1; + int nulls_direction = direction; + + auto nulls_sort_direction = sort_node_typed.getNullsSortDirection(); + if (nulls_sort_direction) + nulls_direction = *nulls_sort_direction == SortDirection::ASCENDING ? 1 : -1; + + if (sort_node_typed.withFill()) + { + FillColumnDescription fill_description = extractWithFillDescription(sort_node_typed); + sort_column_description.emplace_back(column_name, direction, nulls_direction, collator, true /*with_fill*/, fill_description); + } + else + { + sort_column_description.emplace_back(column_name, direction, nulls_direction, collator); + } + } + + const auto & settings = planner_context.getQueryContext()->getSettingsRef(); + sort_column_description.compile_sort_description = settings.compile_sort_description; + sort_column_description.min_count_to_compile_sort_description = settings.min_count_to_compile_sort_description; + + return sort_column_description; +} + +} diff --git a/src/Planner/PlannerSorting.h b/src/Planner/PlannerSorting.h new file mode 100644 index 00000000000..c4e4e634973 --- /dev/null +++ b/src/Planner/PlannerSorting.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/// Extract sort description from order by node +SortDescription extractSortDescription(const QueryTreeNodePtr & order_by_node, const PlannerContext & planner_context); + +} + diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp new file mode 100644 index 00000000000..4fe60a18099 --- /dev/null +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -0,0 +1,146 @@ +#include + +#include +#include + +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +{ + auto & window_node = node->as(); + + WindowDescription window_description; + window_description.window_name = calculateWindowNodeActionName(node, planner_context); + + for (const auto & partition_by_node : window_node.getPartitionBy().getNodes()) + { + auto partition_by_node_action_name = calculateActionNodeName(partition_by_node, planner_context); + auto partition_by_sort_column_description = SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */); + window_description.partition_by.push_back(std::move(partition_by_sort_column_description)); + } + + window_description.order_by = extractSortDescription(window_node.getOrderByNode(), planner_context); + + window_description.full_sort_description = window_description.partition_by; + window_description.full_sort_description.insert(window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end()); + + /// WINDOW frame is validated during query analysis stage + window_description.frame = window_node.getWindowFrame(); + + const auto & query_context = planner_context.getQueryContext(); + const auto & query_context_settings = query_context->getSettingsRef(); + + bool compile_sort_description = query_context_settings.compile_sort_description; + size_t min_count_to_compile_sort_description = query_context_settings.min_count_to_compile_sort_description; + + window_description.partition_by.compile_sort_description = compile_sort_description; + window_description.partition_by.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + window_description.order_by.compile_sort_description = compile_sort_description; + window_description.order_by.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + window_description.full_sort_description.compile_sort_description = compile_sort_description; + window_description.full_sort_description.min_count_to_compile_sort_description = min_count_to_compile_sort_description; + + return window_description; +} + +} + +std::vector extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context) +{ + std::unordered_map window_name_to_description; + + for (const auto & window_function_node : window_function_nodes) + { + auto & window_function_node_typed = window_function_node->as(); + + auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node_typed.getWindowNode(), planner_context); + auto window_name = function_window_description.window_name; + + auto [it, _] = window_name_to_description.emplace(window_name, std::move(function_window_description)); + auto & window_description = it->second; + + WindowFunctionDescription window_function; + window_function.function_node = nullptr; + window_function.column_name = calculateActionNodeName(window_function_node, planner_context); + window_function.aggregate_function = window_function_node_typed.getAggregateFunction(); + + const auto & parameters_nodes = window_function_node_typed.getParameters().getNodes(); + window_function.function_parameters.reserve(parameters_nodes.size()); + + for (const auto & parameter_node : parameters_nodes) + { + /// Function parameters constness validated during analysis stage + window_function.function_parameters.push_back(parameter_node->getConstantValue().getValue()); + } + + const auto & arguments_nodes = window_function_node_typed.getArguments().getNodes(); + size_t arguments_nodes_size = arguments_nodes.size(); + + window_function.argument_names.reserve(arguments_nodes_size); + window_function.argument_types.reserve(arguments_nodes_size); + + for (const auto & argument_node : arguments_nodes) + { + String argument_node_name = calculateActionNodeName(argument_node, planner_context); + window_function.argument_names.emplace_back(std::move(argument_node_name)); + window_function.argument_types.emplace_back(argument_node->getResultType()); + } + + window_description.window_functions.push_back(window_function); + } + + std::vector result; + result.reserve(window_name_to_description.size()); + + for (auto && [_, window_description] : window_name_to_description) + result.push_back(std::move(window_description)); + + return result; +} + +void sortWindowDescriptions(std::vector & window_descriptions) +{ + auto window_description_comparator = [](const WindowDescription & lhs, const WindowDescription & rhs) + { + const auto & left = lhs.full_sort_description; + const auto & right = rhs.full_sort_description; + + for (size_t i = 0; i < std::min(left.size(), right.size()); ++i) + { + if (left[i].column_name < right[i].column_name) + return true; + else if (left[i].column_name > right[i].column_name) + return false; + else if (left[i].direction < right[i].direction) + return true; + else if (left[i].direction > right[i].direction) + return false; + else if (left[i].nulls_direction < right[i].nulls_direction) + return true; + else if (left[i].nulls_direction > right[i].nulls_direction) + return false; + + assert(left[i] == right[i]); + } + + /** Note that we check the length last, because we want to put together the + * sort orders that have common prefix but different length. + */ + return left.size() > right.size(); + }; + + ::sort(window_descriptions.begin(), window_descriptions.end(), window_description_comparator); +} + +} diff --git a/src/Planner/PlannerWindowFunctions.h b/src/Planner/PlannerWindowFunctions.h new file mode 100644 index 00000000000..1552ef5a71f --- /dev/null +++ b/src/Planner/PlannerWindowFunctions.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +#include + +namespace DB +{ + +/// Extract window descriptions from window function nodes +std::vector extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context); + +/** Try to sort window descriptions in such an order that the window with the longest + * sort description goes first, and all window that use its prefixes follow. + */ +void sortWindowDescriptions(std::vector & window_descriptions); + +} diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h new file mode 100644 index 00000000000..0918c35a8ef --- /dev/null +++ b/src/Planner/TableExpressionData.h @@ -0,0 +1,186 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +using ColumnIdentifier = std::string; + +/** Table expression data is created for each table expression that take part in query. + * Table expression data has information about columns that participate in query, their name to identifier mapping, + * and additional table expression properties. + */ +class TableExpressionData +{ +public: + using ColumnNameToColumnIdentifier = std::unordered_map; + + using ColumnIdentifierToColumnName = std::unordered_map; + + /// Return true if column with name exists, false otherwise + bool hasColumn(const std::string & column_name) const + { + return alias_columns_names.contains(column_name) || columns_names.contains(column_name); + } + + /** Add column in table expression data. + * Column identifier must be created using global planner context. + * + * Logical error exception is thrown if column already exists. + */ + void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + { + if (hasColumn(column.name)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists"); + + columns_names.insert(column.name); + columns.push_back(column); + column_name_to_column_identifier.emplace(column.name, column_identifier); + column_identifier_to_column_name.emplace(column_identifier, column.name); + } + + /** Add column if it does not exists in table expression data. + * Column identifier must be created using global planner context. + */ + void addColumnIfNotExists(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) + { + if (hasColumn(column.name)) + return; + + columns_names.insert(column.name); + columns.push_back(column); + column_name_to_column_identifier.emplace(column.name, column_identifier); + column_identifier_to_column_name.emplace(column_identifier, column.name); + } + + /// Add alias column name + void addAliasColumnName(const std::string & column_name) + { + alias_columns_names.insert(column_name); + } + + /// Get alias columns names + const NameSet & getAliasColumnsNames() const + { + return alias_columns_names; + } + + /// Get columns names + const NameSet & getColumnsNames() const + { + return columns_names; + } + + /// Get columns + const NamesAndTypesList & getColumns() const + { + return columns; + } + + /// Get column name to column identifier map + const ColumnNameToColumnIdentifier & getColumnNameToIdentifier() const + { + return column_name_to_column_identifier; + } + + /// Get column identifier to column name map + const ColumnNameToColumnIdentifier & getColumnIdentifierToColumnName() const + { + return column_identifier_to_column_name; + } + + /** Get column identifier for column name. + * Exception is thrown if there are no column identifier for column name. + */ + const ColumnIdentifier & getColumnIdentifierOrThrow(const std::string & column_name) const + { + auto it = column_name_to_column_identifier.find(column_name); + if (it == column_name_to_column_identifier.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column identifier for name {} does not exists", + column_name); + + return it->second; + } + + /** Get column identifier for column name. + * Null is returned if there are no column identifier for column name. + */ + const ColumnIdentifier * getColumnIdentifierOrNull(const std::string & column_name) const + { + auto it = column_name_to_column_identifier.find(column_name); + if (it == column_name_to_column_identifier.end()) + return nullptr; + + return &it->second; + } + + /** Get column name for column identifier. + * Exception is thrown if there are no column name for column identifier. + */ + const std::string & getColumnNameOrThrow(const ColumnIdentifier & column_identifier) const + { + auto it = column_identifier_to_column_name.find(column_identifier); + if (it == column_identifier_to_column_name.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column name for identifier {} does not exists", + column_identifier); + + return it->second; + } + + /** Get column name for column identifier. + * Null is returned if there are no column name for column identifier. + */ + const std::string * getColumnNameOrNull(const ColumnIdentifier & column_identifier) const + { + auto it = column_identifier_to_column_name.find(column_identifier); + if (it == column_identifier_to_column_name.end()) + return nullptr; + + return &it->second; + } + + /** Returns true if storage is remote, false otherwise. + * + * Valid only for table and table function node. + */ + bool isRemote() const + { + return is_remote; + } + + /// Set is storage remote value + void setIsRemote(bool is_remote_value) + { + is_remote = is_remote_value; + } + +private: + /// Valid for table, table function, query, union table expression nodes + NamesAndTypesList columns; + + /// Valid for table, table function, query, union table expression nodes + NameSet columns_names; + + /// Valid only for table table expression node + NameSet alias_columns_names; + + /// Valid for table, table function, query, union table expression nodes + ColumnNameToColumnIdentifier column_name_to_column_identifier; + + /// Valid for table, table function, query, union table expression nodes + ColumnIdentifierToColumnName column_identifier_to_column_name; + + /// Is storage remote + bool is_remote = false; +}; + +} diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp new file mode 100644 index 00000000000..74918285453 --- /dev/null +++ b/src/Planner/Utils.cpp @@ -0,0 +1,314 @@ +#include + +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; + extern const int LOGICAL_ERROR; +} + +String dumpQueryPlan(QueryPlan & query_plan) +{ + WriteBufferFromOwnString query_plan_buffer; + query_plan.explainPlan(query_plan_buffer, QueryPlan::ExplainPlanOptions{true, true, true, true}); + + return query_plan_buffer.str(); +} + +String dumpQueryPipeline(QueryPlan & query_plan) +{ + QueryPlan::ExplainPipelineOptions explain_pipeline; + WriteBufferFromOwnString query_pipeline_buffer; + query_plan.explainPipeline(query_pipeline_buffer, explain_pipeline); + + return query_pipeline_buffer.str(); +} + +Block buildCommonHeaderForUnion(const Blocks & queries_headers) +{ + size_t num_selects = queries_headers.size(); + Block common_header = queries_headers.front(); + size_t columns_size = common_header.columns(); + + for (size_t query_number = 1; query_number < num_selects; ++query_number) + { + if (queries_headers.at(query_number).columns() != columns_size) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Different number of columns in UNION elements: {} and {}", + common_header.dumpNames(), + queries_headers[query_number].dumpNames()); + } + + std::vector columns(num_selects); + + for (size_t column_number = 0; column_number < columns_size; ++column_number) + { + for (size_t i = 0; i < num_selects; ++i) + columns[i] = &queries_headers[i].getByPosition(column_number); + + ColumnWithTypeAndName & result_element = common_header.getByPosition(column_number); + result_element = getLeastSuperColumn(columns); + } + + return common_header; +} + +ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node) +{ + auto & query_node_typed = query_node->as(); + auto result_ast = query_node_typed.toAST(); + + while (true) + { + if (auto * select_query = result_ast->as()) + break; + else if (auto * select_with_union = result_ast->as()) + result_ast = select_with_union->list_of_selects->children.at(0); + else if (auto * subquery = result_ast->as()) + result_ast = subquery->children.at(0); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query node invalid conversion to select query"); + } + + if (result_ast == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query node invalid conversion to select query"); + + return result_ast; +} + +/** There are no limits on the maximum size of the result for the subquery. + * Since the result of the query is not the result of the entire query. + */ +ContextPtr buildSubqueryContext(const ContextPtr & context) +{ + /** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result. + * Because the result of this query is not the result of the entire query. + * Constraints work instead + * max_rows_in_set, max_bytes_in_set, set_overflow_mode, + * max_rows_in_join, max_bytes_in_join, join_overflow_mode, + * which are checked separately (in the Set, Join objects). + */ + auto subquery_context = Context::createCopy(context); + Settings subquery_settings = context->getSettings(); + subquery_settings.max_result_rows = 0; + subquery_settings.max_result_bytes = 0; + /// The calculation of extremes does not make sense and is not necessary (if you do it, then the extremes of the subquery can be taken for whole query). + subquery_settings.extremes = false; + subquery_context->setSettings(subquery_settings); + + return subquery_context; +} + +namespace +{ + +StreamLocalLimits getLimitsForStorage(const Settings & settings, const SelectQueryOptions & options) +{ + StreamLocalLimits limits; + limits.mode = LimitsMode::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.speed_limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; + + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + * because the initiating server has a summary of the execution of the request on all servers. + * + * But limits on data size to read and maximum execution time are reasonable to check both on initiator and + * additionally on each remote server, because these limits are checked per block of data processed, + * and remote servers may process way more blocks of data than are received by initiator. + * + * The limits to throttle maximum execution speed is also checked on all servers. + */ + if (options.to_stage == QueryProcessingStage::Complete) + { + limits.speed_limits.min_execution_rps = settings.min_execution_speed; + limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes; + } + + limits.speed_limits.max_execution_rps = settings.max_execution_speed; + limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; + limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + + return limits; +} + +} + +StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptions & options) +{ + const auto & settings = context.getSettingsRef(); + + StreamLocalLimits limits; + SizeLimits leaf_limits; + + /// Set the limits and quota for reading data, the speed and time of the query. + if (!options.ignore_limits) + { + limits = getLimitsForStorage(settings, options); + leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, settings.max_bytes_to_read_leaf, settings.read_overflow_mode_leaf); + } + + return {limits, leaf_limits}; +} + +ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context) +{ + ActionsDAGPtr action_dag = std::make_shared(input_columns); + PlannerActionsVisitor actions_visitor(planner_context); + auto expression_dag_index_nodes = actions_visitor.visit(action_dag, expression_node); + action_dag->getOutputs() = std::move(expression_dag_index_nodes); + + return action_dag; +} + +bool sortDescriptionIsPrefix(const SortDescription & prefix, const SortDescription & full) +{ + size_t prefix_size = prefix.size(); + if (prefix_size > full.size()) + return false; + + for (size_t i = 0; i < prefix_size; ++i) + { + if (full[i] != prefix[i]) + return false; + } + + return true; +} + +bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node) +{ + const auto & query_node_typed = query_node->as(); + + std::vector join_tree_nodes_to_process; + join_tree_nodes_to_process.push_back(query_node_typed.getJoinTree()); + + while (!join_tree_nodes_to_process.empty()) + { + auto join_tree_node_to_process = join_tree_nodes_to_process.back(); + join_tree_nodes_to_process.pop_back(); + + auto join_tree_node_type = join_tree_node_to_process->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::QUERY: + [[fallthrough]]; + case QueryTreeNodeType::UNION: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + return true; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(join_node.getLeftTableExpression()); + join_tree_nodes_to_process.push_back(join_node.getRightTableExpression()); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); + } + } + } + + return false; +} + +bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node) +{ + const auto & query_node_typed = query_node->as(); + + std::vector join_tree_nodes_to_process; + join_tree_nodes_to_process.push_back(query_node_typed.getJoinTree()); + + while (!join_tree_nodes_to_process.empty()) + { + auto join_tree_node_to_process = join_tree_nodes_to_process.back(); + join_tree_nodes_to_process.pop_back(); + + auto join_tree_node_type = join_tree_node_to_process->getNodeType(); + + switch (join_tree_node_type) + { + case QueryTreeNodeType::TABLE: + [[fallthrough]]; + case QueryTreeNodeType::TABLE_FUNCTION: + { + break; + } + case QueryTreeNodeType::QUERY: + { + auto & query_node_to_process = join_tree_node_to_process->as(); + if (query_node_to_process.isGroupByWithTotals()) + return true; + + join_tree_nodes_to_process.push_back(query_node_to_process.getJoinTree()); + break; + } + case QueryTreeNodeType::UNION: + { + auto & union_node = join_tree_node_to_process->as(); + auto & union_queries = union_node.getQueries().getNodes(); + + for (auto & union_query : union_queries) + join_tree_nodes_to_process.push_back(union_query); + break; + } + case QueryTreeNodeType::ARRAY_JOIN: + { + auto & array_join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(array_join_node.getTableExpression()); + break; + } + case QueryTreeNodeType::JOIN: + { + auto & join_node = join_tree_node_to_process->as(); + join_tree_nodes_to_process.push_back(join_node.getLeftTableExpression()); + join_tree_nodes_to_process.push_back(join_node.getRightTableExpression()); + break; + } + default: + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); + } + } + } + + return false; +} + +} diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h new file mode 100644 index 00000000000..909cea3bf8f --- /dev/null +++ b/src/Planner/Utils.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +#include + +#include + +#include + +#include + +namespace DB +{ + +/// Dump query plan +String dumpQueryPlan(QueryPlan & query_plan); + +/// Dump query plan result pipeline +String dumpQueryPipeline(QueryPlan & query_plan); + +/// Build common header for UNION query +Block buildCommonHeaderForUnion(const Blocks & queries_headers); + +/// Convert query node to ASTSelectQuery +ASTPtr queryNodeToSelectQuery(const QueryTreeNodePtr & query_node); + +/// Build context for subquery execution +ContextPtr buildSubqueryContext(const ContextPtr & context); + +/// Build limits for storage +StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptions & options); + +/** Convert query tree expression node into actions dag. + * Inputs are not used for actions dag outputs. + * Only root query tree expression node is used as actions dag output. + */ +ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, + const ColumnsWithTypeAndName & input_columns, + const PlannerContextPtr & planner_context); + +/// Returns true if prefix sort description is prefix of full sort descriptor, false otherwise +bool sortDescriptionIsPrefix(const SortDescription & prefix, const SortDescription & full); + +/// Returns true if query node JOIN TREE contains ARRAY JOIN node, false otherwise +bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node); + +/** Returns true if query node JOIN TREE contains QUERY node with WITH TOTALS, false otherwise. + * Function is applied recursively to subqueries in JOIN TREE. + */ +bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node); + +} diff --git a/src/Planner/examples/CMakeLists.txt b/src/Planner/examples/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Planner/tests/CMakeLists.txt b/src/Planner/tests/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index bd079c0b8a9..b52d86aa725 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -122,11 +122,8 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } -void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context) +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context) { - if (!prepared_sets || prepared_sets->empty()) - return; - DataStreams input_streams; input_streams.emplace_back(query_plan.getCurrentDataStream()); @@ -134,7 +131,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, plans.emplace_back(std::make_unique(std::move(query_plan))); query_plan = QueryPlan(); - for (auto & [description, subquery_for_set] : prepared_sets->detachSubqueries()) + for (auto & [description, subquery_for_set] : subqueries_for_sets) { if (!subquery_for_set.hasSource()) continue; @@ -166,4 +163,12 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, query_plan.unitePlans(std::move(creating_sets), std::move(plans)); } +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context) +{ + if (!prepared_sets || prepared_sets->empty()) + return; + + addCreatingSetsStep(query_plan, prepared_sets->detachSubqueries(), context); +} + } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 9c61eb2012c..9995af7bca7 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -49,6 +49,8 @@ private: Processors processors; }; +void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context); + void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context); } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index b2738cb297f..d7eab574431 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -8,9 +8,9 @@ namespace DB class IntersectOrExceptStep : public IQueryPlanStep { -using Operator = ASTSelectIntersectExceptQuery::Operator; - public: + using Operator = ASTSelectIntersectExceptQuery::Operator; + /// max_threads is used to limit the number of threads for result pipeline. IntersectOrExceptStep(DataStreams input_streams_, Operator operator_, size_t max_threads_ = 0); diff --git a/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp b/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp index a8431d38a78..f5e7a438877 100644 --- a/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/reuseStorageOrderingForWindowFunctions.cpp @@ -62,7 +62,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, } auto context = read_from_merge_tree->getContext(); - if (!context->getSettings().optimize_read_in_window_order) + if (!context->getSettings().optimize_read_in_window_order || context->getSettingsRef().use_analyzer) { return 0; } @@ -70,6 +70,10 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, const auto & query_info = read_from_merge_tree->getQueryInfo(); const auto * select_query = query_info.query->as(); + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return 0; + ManyExpressionActions order_by_elements_actions; const auto & window_desc = window->getWindowDescription(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 59f9668d3a8..62b6eddf6ce 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -945,7 +945,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( result.index_stats); result.sampling = MergeTreeDataSelectExecutor::getSampling( - select, + query_info, metadata_snapshot->getColumns().getAllPhysical(), parts, *key_condition, @@ -965,7 +965,13 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( auto reader_settings = getMergeTreeReaderSettings(context, query_info); bool use_skip_indexes = settings.use_skip_indexes; - if (select.final() && !settings.use_skip_indexes_if_final) + bool final = false; + if (query_info.table_expression_modifiers) + final = query_info.table_expression_modifiers->hasFinal(); + else + final = select.final(); + + if (final && !settings.use_skip_indexes_if_final) use_skip_indexes = false; result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( @@ -1097,7 +1103,13 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Names column_names_to_read = std::move(result.column_names_to_read); const auto & select = query_info.query->as(); - if (!select.final() && result.sampling.use_sampling) + bool final = false; + if (query_info.table_expression_modifiers) + final = query_info.table_expression_modifiers->hasFinal(); + else + final = select.final(); + + if (!final && result.sampling.use_sampling) { /// Add columns needed for `sample_by_ast` to `column_names_to_read`. /// Skip this if final was used, because such columns were already added from PK. @@ -1112,7 +1124,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons const auto & input_order_info = query_info.getInputOrderInfo(); - if (select.final()) + if (final) { /// Add columns needed to calculate the sorting expression and the sign. std::vector add_columns = metadata_for_reading->getColumnsRequiredForSortingKey(); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d7c33c8663b..b282fb79514 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -848,9 +848,11 @@ Block KeyCondition::getBlockWithConstants( { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy" } }; - const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions(); - - expr_for_constant_folding->execute(result); + if (syntax_analyzer_result) + { + const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions(); + expr_for_constant_folding->execute(result); + } return result; } @@ -887,13 +889,22 @@ KeyCondition::KeyCondition( key_columns[name] = i; } + if (!syntax_analyzer_result) + { + rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); + return; + } + /** Evaluation of expressions that depend only on constants. * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. */ Block block_with_constants = getBlockWithConstants(query, syntax_analyzer_result, context); - for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source) - array_joined_columns.insert(name); + if (syntax_analyzer_result) + { + for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source) + array_joined_columns.insert(name); + } const ASTSelectQuery & select = query->as(); @@ -964,6 +975,12 @@ KeyCondition::KeyCondition( key_columns[name] = i; } + if (!syntax_analyzer_result) + { + rpn.emplace_back(RPNElement::FUNCTION_UNKNOWN); + return; + } + for (const auto & [name, _] : syntax_analyzer_result->array_join_result_to_source) array_joined_columns.insert(name); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 66950734d5f..510e8d0da84 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5702,6 +5702,11 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { const auto & metadata_snapshot = storage_snapshot->metadata; const auto & settings = query_context->getSettingsRef(); + + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return std::nullopt; + if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query || settings.aggregate_functions_null_for_empty /* projections don't work correctly with this setting */) return std::nullopt; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1a5c94a2e26..b0ef1522685 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -333,6 +333,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( SimpleMergeSelector::Settings merge_settings; /// Override value from table settings merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once; + merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds; if (aggressive) merge_settings.base = 1; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 5b3497bf926..b81fafbc578 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -107,14 +107,12 @@ static std::string toString(const RelativeSize & x) } /// Converts sample size to an approximate number of rows (ex. `SAMPLE 1000000`) to relative value (ex. `SAMPLE 0.1`). -static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, size_t approx_total_rows) +static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTSampleRatio::Rational & ratio, size_t approx_total_rows) { if (approx_total_rows == 0) return 1; - const auto & node_sample = node->as(); - - auto absolute_sample_size = node_sample.ratio.numerator / node_sample.ratio.denominator; + auto absolute_sample_size = ratio.numerator / ratio.denominator; return std::min(RelativeSize(1), RelativeSize(absolute_sample_size) / RelativeSize(approx_total_rows)); } @@ -467,7 +465,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( } MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( - const ASTSelectQuery & select, + const SelectQueryInfo & select_query_info, NamesAndTypesList available_real_columns, const MergeTreeData::DataPartsVector & parts, KeyCondition & key_condition, @@ -484,23 +482,42 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( RelativeSize relative_sample_size = 0; RelativeSize relative_sample_offset = 0; - auto select_sample_size = select.sampleSize(); - auto select_sample_offset = select.sampleOffset(); + bool final = false; + std::optional sample_size_ratio; + std::optional sample_offset_ratio; - if (select_sample_size) + if (select_query_info.table_expression_modifiers) { - relative_sample_size.assign( - select_sample_size->as().ratio.numerator, - select_sample_size->as().ratio.denominator); + const auto & table_expression_modifiers = *select_query_info.table_expression_modifiers; + final = table_expression_modifiers.hasFinal(); + sample_size_ratio = table_expression_modifiers.getSampleSizeRatio(); + sample_offset_ratio = table_expression_modifiers.getSampleOffsetRatio(); + } + else + { + auto & select = select_query_info.query->as(); + + final = select.final(); + auto select_sample_size = select.sampleSize(); + auto select_sample_offset = select.sampleOffset(); + + if (select_sample_size) + sample_size_ratio = select_sample_size->as().ratio; + + if (select_sample_offset) + sample_offset_ratio = select_sample_offset->as().ratio; + } + + if (sample_size_ratio) + { + relative_sample_size.assign(sample_size_ratio->numerator, sample_size_ratio->denominator); if (relative_sample_size < 0) throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); relative_sample_offset = 0; - if (select_sample_offset) - relative_sample_offset.assign( - select_sample_offset->as().ratio.numerator, - select_sample_offset->as().ratio.denominator); + if (sample_offset_ratio) + relative_sample_offset.assign(sample_offset_ratio->numerator, sample_offset_ratio->denominator); if (relative_sample_offset < 0) throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); @@ -513,7 +530,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (relative_sample_size > 1) { - relative_sample_size = convertAbsoluteSampleSizeToRelative(select_sample_size, approx_total_rows); + relative_sample_size = convertAbsoluteSampleSizeToRelative(*sample_size_ratio, approx_total_rows); LOG_DEBUG(log, "Selected relative sample size: {}", toString(relative_sample_size)); } @@ -526,7 +543,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( if (relative_sample_offset > 1) { - relative_sample_offset = convertAbsoluteSampleSizeToRelative(select_sample_offset, approx_total_rows); + relative_sample_offset = convertAbsoluteSampleSizeToRelative(*sample_offset_ratio, approx_total_rows); LOG_DEBUG(log, "Selected relative sample offset: {}", toString(relative_sample_offset)); } } @@ -660,7 +677,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// So, assume that we already have calculated column. ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); - if (select.final()) + if (final) { sampling_key_ast = std::make_shared(sampling_key.column_names[0]); /// We do spoil available_real_columns here, but it is not used later. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 66dd7f7e5db..541f6446674 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -201,7 +201,7 @@ public: /// Also, calculate _sample_factor if needed. /// Also, update key condition with selected sampling range. static MergeTreeDataSelectSamplingData getSampling( - const ASTSelectQuery & select, + const SelectQueryInfo & select_query_info, NamesAndTypesList available_real_columns, const MergeTreeData::DataPartsVector & parts, KeyCondition & key_condition, diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a0db39a97f1..3fecb85f484 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -62,6 +62,7 @@ struct Settings; M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \ + M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index e2b23d75746..10ec4702b53 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -26,19 +27,12 @@ namespace DB namespace ErrorCodes { extern const int REPLICA_IS_ALREADY_ACTIVE; - extern const int REPLICA_STATUS_CHANGED; - -} - -namespace -{ - constexpr auto retry_period_ms = 1000; } /// Used to check whether it's us who set node `is_active`, or not. static String generateActiveNodeIdentifier() { - return "pid: " + toString(getpid()) + ", random: " + toString(randomSeed()); + return Field(ServerUUID::get()).dump(); } ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) @@ -58,27 +52,34 @@ void ReplicatedMergeTreeRestartingThread::run() if (need_stop) return; - size_t reschedule_period_ms = check_period_ms; + /// In case of any exceptions we want to rerun the this task as fast as possible but we also don't want to keep retrying immediately + /// in a close loop (as fast as tasks can be processed), so we'll retry in between 100 and 10000 ms + const size_t backoff_ms = 100 * ((consecutive_check_failures + 1) * (consecutive_check_failures + 2)) / 2; + const size_t next_failure_retry_ms = std::min(size_t{10000}, backoff_ms); try { bool replica_is_active = runImpl(); - if (!replica_is_active) - reschedule_period_ms = retry_period_ms; - } - catch (const Exception & e) - { - /// We couldn't activate table let's set it into readonly mode - partialShutdown(); - tryLogCurrentException(log, __PRETTY_FUNCTION__); - - if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) - reschedule_period_ms = 0; + if (replica_is_active) + { + consecutive_check_failures = 0; + task->scheduleAfter(check_period_ms); + } + else + { + consecutive_check_failures++; + task->scheduleAfter(next_failure_retry_ms); + } } catch (...) { + consecutive_check_failures++; + task->scheduleAfter(next_failure_retry_ms); + + /// We couldn't activate table let's set it into readonly mode if necessary + /// We do this after scheduling the task in case it throws partialShutdown(); - tryLogCurrentException(log, __PRETTY_FUNCTION__); + tryLogCurrentException(log, "Failed to restart the table. Will try again"); } if (first_time) @@ -92,14 +93,6 @@ void ReplicatedMergeTreeRestartingThread::run() storage.startup_event.set(); first_time = false; } - - if (need_stop) - return; - - if (reschedule_period_ms) - task->scheduleAfter(reschedule_period_ms); - else - task->schedule(); } bool ReplicatedMergeTreeRestartingThread::runImpl() @@ -132,8 +125,8 @@ bool ReplicatedMergeTreeRestartingThread::runImpl() } catch (const Coordination::Exception &) { - /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. - tryLogCurrentException(log, __PRETTY_FUNCTION__); + /// The exception when you try to zookeeper_init usually happens if DNS does not work or the connection with ZK fails + tryLogCurrentException(log, "Failed to establish a new ZK connection. Will try again"); assert(storage.is_readonly); return false; } @@ -158,12 +151,15 @@ bool ReplicatedMergeTreeRestartingThread::runImpl() storage.cleanup_thread.start(); storage.part_check_thread.start(); + LOG_DEBUG(log, "Table started successfully"); + return true; } bool ReplicatedMergeTreeRestartingThread::tryStartup() { + LOG_DEBUG(log, "Trying to start replica up"); try { removeFailedQuorumParts(); @@ -177,9 +173,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() try { storage.queue.initialize(zookeeper); - storage.queue.load(zookeeper); - storage.queue.createLogEntriesToFetchBrokenParts(); /// pullLogsToQueue() after we mark replica 'is_active' (and after we repair if it was lost); @@ -302,7 +296,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() ReplicatedMergeTreeAddress address = storage.getReplicatedMergeTreeAddress(); String is_active_path = fs::path(storage.replica_path) / "is_active"; - zookeeper->waitForEphemeralToDisappearIfAny(is_active_path); + zookeeper->handleEphemeralNodeExistence(is_active_path, active_node_identifier); /// Simultaneously declare that this replica is active, and update the host. Coordination::Requests ops; @@ -348,7 +342,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shut storage.replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); - storage.merge_selecting_task->deactivate(); storage.queue_updating_task->deactivate(); storage.mutations_updating_task->deactivate(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 3d443a236ed..bb4b0c0fdd2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -41,6 +41,7 @@ private: BackgroundSchedulePool::TaskHolder task; Int64 check_period_ms; /// The frequency of checking expiration of session in ZK. + UInt32 consecutive_check_failures = 0; /// How many consecutive checks have failed bool first_time = true; /// Activate replica for the first time. void run(); diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 3b71e2720c8..f9ed6aedc60 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -102,6 +102,9 @@ bool allow( double max_size_to_lower_base_log, const SimpleMergeSelector::Settings & settings) { + if (settings.min_age_to_force_merge && min_age >= settings.min_age_to_force_merge) + return true; + // std::cerr << "sum_size: " << sum_size << "\n"; /// Map size to 0..1 using logarithmic scale diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 11ffe8b672a..c20eaa6e8de 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -141,6 +141,11 @@ public: double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5; double heuristic_to_align_parts_max_score_adjustment = 0.75; + /** If it's not 0, all part ranges that have min_age larger than min_age_to_force_merge + * will be considered for merging + */ + size_t min_age_to_force_merge = 0; + /** Heuristic: * From right side of range, remove all parts, that size is less than specified ratio of sum_size. */ diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index f2835ab4dbf..58fb37b97e8 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include @@ -177,6 +180,15 @@ struct SelectQueryInfo ASTPtr view_query; /// Optimized VIEW query ASTPtr original_query; /// Unmodified query for projection analysis + /// Planner context + PlannerContextPtr planner_context; + + /// Storage table expression + QueryTreeNodePtr table_expression; + + /// Table expression modifiers for storage + std::optional table_expression_modifiers; + std::shared_ptr storage_limits; /// Cluster for the query. diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index f9995cceda3..a76c4dffb5b 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -176,6 +176,12 @@ Pipe StorageDictionary::read( return dictionary->read(column_names, max_block_size, threads); } +std::shared_ptr StorageDictionary::getDictionary() const +{ + auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; + return getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, getContext()); +} + void StorageDictionary::shutdown() { removeDictionaryConfigurationFromRepository(); diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 6eadd1b2c21..b3442ec2f99 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -8,8 +8,10 @@ namespace DB { + struct DictionaryStructure; class TableFunctionDictionary; +class IDictionary; class StorageDictionary final : public IStorage, public WithContext { @@ -71,6 +73,8 @@ public: size_t max_block_size, size_t threads) override; + std::shared_ptr getDictionary() const; + static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure); static String generateNamesAndTypesDescription(const NamesAndTypesList & list); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index bced552915b..8e4715db483 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -564,6 +564,10 @@ std::optional StorageDistributed::getOptimizedQueryP return {}; } + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + return {}; + // GROUP BY const ASTPtr group_by = select.groupBy(); if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c9067148739..7fb21b7e053 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -225,11 +225,15 @@ SelectQueryInfo getModifiedQueryInfo( SelectQueryInfo modified_query_info = query_info; modified_query_info.query = query_info.query->clone(); - /// Original query could contain JOIN but we need only the first joined table and its columns. - auto & modified_select = modified_query_info.query->as(); - TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result; - removeJoin(modified_select, new_analyzer_res, modified_context); - modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); + /// TODO: Analyzer syntax analyzer result + if (modified_query_info.syntax_analyzer_result) + { + /// Original query could contain JOIN but we need only the first joined table and its columns. + auto & modified_select = modified_query_info.query->as(); + TreeRewriterResult new_analyzer_res = *modified_query_info.syntax_analyzer_result; + removeJoin(modified_select, new_analyzer_res, modified_context); + modified_query_info.syntax_analyzer_result = std::make_shared(std::move(new_analyzer_res)); + } if (!is_merge_engine) { @@ -513,7 +517,13 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline()); } - if (!modified_select.final() && storage->needRewriteQueryWithFinal(real_column_names)) + bool final = false; + if (modified_query_info.table_expression_modifiers) + final = modified_query_info.table_expression_modifiers->hasFinal(); + else + final = modified_select.final(); + + if (!final && storage->needRewriteQueryWithFinal(real_column_names)) { /// NOTE: It may not work correctly in some cases, because query was analyzed without final. /// However, it's needed for MaterializedMySQL and it's unlikely that someone will use it with Merge tables. diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index c42fb7fa965..51b11680f82 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; + extern const int UNSUPPORTED_METHOD; } namespace @@ -251,6 +252,11 @@ String transformQueryForExternalDatabase( ContextPtr context) { auto clone_query = query_info.query->clone(); + + /// TODO: Analyzer syntax analyzer result + if (!query_info.syntax_analyzer_result) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "transform query for external database is unsupported"); + const Names used_columns = query_info.syntax_analyzer_result->requiredSourceColumns(); bool strict = context->getSettingsRef().external_table_strict_query; diff --git a/tests/.rgignore b/tests/.rgignore new file mode 100644 index 00000000000..26cb6f9025d --- /dev/null +++ b/tests/.rgignore @@ -0,0 +1 @@ +data_json diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 52c583973d0..a3f2650eac7 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -387,7 +387,7 @@ progress { , stats { rows: 8 blocks: 4 - allocated_bytes: 324 + allocated_bytes: 1092 applied_limit: true rows_before_limit: 8 } diff --git a/tests/integration/test_merge_tree_optimize_old_parts/__init__.py b/tests/integration/test_merge_tree_optimize_old_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml b/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml new file mode 100644 index 00000000000..18412349228 --- /dev/null +++ b/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml @@ -0,0 +1,8 @@ + + + + zoo1 + 2181 + + + diff --git a/tests/integration/test_merge_tree_optimize_old_parts/test.py b/tests/integration/test_merge_tree_optimize_old_parts/test.py new file mode 100644 index 00000000000..7b386eba2c4 --- /dev/null +++ b/tests/integration/test_merge_tree_optimize_old_parts/test.py @@ -0,0 +1,88 @@ +import pytest +import time +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/zookeeper_config.xml"], + with_zookeeper=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def get_part_number(table_name): + return TSV( + node.query( + f"SELECT count(*) FROM system.parts where table='{table_name}' and active=1" + ) + ) + + +def check_expected_part_number(seconds, table_name, expected): + ok = False + for i in range(int(seconds) * 2): + result = get_part_number(table_name) + if result == expected: + ok = True + break + else: + time.sleep(1) + assert ok + + +def test_without_force_merge_old_parts(start_cluster): + node.query( + "CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i;" + ) + node.query("INSERT INTO test_without_merge SELECT 1") + node.query("INSERT INTO test_without_merge SELECT 2") + node.query("INSERT INTO test_without_merge SELECT 3") + + expected = TSV("""3\n""") + # verify that the parts don't get merged + for i in range(10): + if get_part_number("test_without_merge") != expected: + assert False + time.sleep(1) + + node.query("DROP TABLE test_without_merge;") + + +def test_force_merge_old_parts(start_cluster): + node.query( + "CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;" + ) + node.query("INSERT INTO test_with_merge SELECT 1") + node.query("INSERT INTO test_with_merge SELECT 2") + node.query("INSERT INTO test_with_merge SELECT 3") + + expected = TSV("""1\n""") + check_expected_part_number(10, "test_with_merge", expected) + + node.query("DROP TABLE test_with_merge;") + + +def test_force_merge_old_parts_replicated_merge_tree(start_cluster): + node.query( + "CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;" + ) + node.query("INSERT INTO test_replicated SELECT 1") + node.query("INSERT INTO test_replicated SELECT 2") + node.query("INSERT INTO test_replicated SELECT 3") + + expected = TSV("""1\n""") + check_expected_part_number(10, "test_replicated", expected) + + node.query("DROP TABLE test_replicated;") diff --git a/tests/integration/test_read_only_table/__init__.py b/tests/integration/test_read_only_table/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py new file mode 100644 index 00000000000..28abbf6601e --- /dev/null +++ b/tests/integration/test_read_only_table/test.py @@ -0,0 +1,89 @@ +import time +import re +import logging + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +NUM_TABLES = 10 + + +def fill_nodes(nodes): + for table_id in range(NUM_TABLES): + for node in nodes: + node.query( + f""" + CREATE TABLE test_table_{table_id}(a UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/replicated/{table_id}', '{node.name}') ORDER BY tuple(); + """ + ) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True) +node3 = cluster.add_instance("node3", with_zookeeper=True) +nodes = [node1, node2, node3] + + +def sync_replicas(table): + for node in nodes: + node.query(f"SYSTEM SYNC REPLICA {table}") + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes(nodes) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_restart_zookeeper(start_cluster): + + for table_id in range(NUM_TABLES): + node1.query( + f"INSERT INTO test_table_{table_id} VALUES (1), (2), (3), (4), (5);" + ) + + logging.info("Inserted test data and initialized all tables") + + def get_zookeeper_which_node_connected_to(node): + line = str( + node.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 2181 | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ).strip() + + pattern = re.compile(r"zoo[0-9]+", re.IGNORECASE) + result = pattern.findall(line) + assert ( + len(result) == 1 + ), "ClickHouse must be connected only to one Zookeeper at a time" + return result[0] + + node1_zk = get_zookeeper_which_node_connected_to(node1) + + # ClickHouse should +- immediately reconnect to another zookeeper node + cluster.stop_zookeeper_nodes([node1_zk]) + time.sleep(5) + + for table_id in range(NUM_TABLES): + node1.query( + f"INSERT INTO test_table_{table_id} VALUES (6), (7), (8), (9), (10);" + ) diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 2e696be4988..1933823f5d2 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -867,3 +867,30 @@ def test_policy_on_distributed_table_via_role(): assert node.query( "SELECT * FROM dist_tbl SETTINGS prefer_localhost_replica=0", user="user1" ) == TSV([[0], [2], [4], [6], [8], [0], [2], [4], [6], [8]]) + + +def test_row_policy_filter_with_subquery(): + copy_policy_xml("no_filters.xml") + assert node.query("SHOW POLICIES") == "" + + node.query("DROP ROW POLICY IF EXISTS filter_1 ON table1") + node.query("DROP TABLE IF EXISTS table_1") + node.query("DROP TABLE IF EXISTS table_2") + + node.query( + "CREATE TABLE table_1 (x int, y int) ENGINE = MergeTree ORDER BY tuple()" + ) + node.query("INSERT INTO table_1 SELECT number, number * number FROM numbers(10)") + + node.query("CREATE TABLE table_2 (a int) ENGINE=MergeTree ORDER BY tuple()") + node.query("INSERT INTO table_2 VALUES (3), (5)") + + node.query( + "CREATE ROW POLICY filter_1 ON table_1 USING x IN (SELECT a FROM table_2) TO ALL" + ) + + assert node.query("SELECT * FROM table_1") == TSV([[3, 9], [5, 25]]) + + node.query("DROP ROW POLICY filter_1 ON table_1") + node.query("DROP TABLE table_1") + node.query("DROP TABLE table_2") diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py new file mode 100644 index 00000000000..a1e10cde031 --- /dev/null +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +single_node_cluster = ClickHouseCluster(__file__) +small_node = single_node_cluster.add_instance( + "small_node", main_configs=["configs/s3.xml"], with_minio=True +) + + +@pytest.fixture(scope="module") +def started_single_node_cluster(): + try: + single_node_cluster.start() + + yield single_node_cluster + finally: + single_node_cluster.shutdown() + + +def test_move_and_s3_memory_usage(started_single_node_cluster): + if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): + pytest.skip("Disabled for debug and sanitizers. Too slow.") + + small_node.query( + "CREATE TABLE s3_test_with_ttl (x UInt32, a String codec(NONE), b String codec(NONE), c String codec(NONE), d String codec(NONE), e String codec(NONE)) engine = MergeTree order by x partition by x SETTINGS storage_policy='s3_and_default'" + ) + + for _ in range(10): + small_node.query( + "insert into s3_test_with_ttl select 0, repeat('a', 100), repeat('b', 100), repeat('c', 100), repeat('d', 100), repeat('e', 100) from zeros(400000) settings max_block_size = 8192, max_insert_block_size=10000000, min_insert_block_size_rows=10000000" + ) + + # After this, we should have 5 columns per 10 * 100 * 400000 ~ 400 MB; total ~2G data in partition + small_node.query("optimize table s3_test_with_ttl final") + + small_node.query("system flush logs") + # Will take memory usage from metric_log. + # It is easier then specifying total memory limit (insert queries can hit this limit). + small_node.query("truncate table system.metric_log") + + small_node.query( + "alter table s3_test_with_ttl move partition 0 to volume 'external'", + settings={"send_logs_level": "error"}, + ) + small_node.query("system flush logs") + max_usage = small_node.query( + "select max(CurrentMetric_MemoryTracking) from system.metric_log" + ) + # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. + # Also actual value can be bigger because of memory drift. + # Increase it a little bit if test fails. + assert int(max_usage) < 3e9 + res = small_node.query( + "select * from system.errors where last_error_message like '%Memory limit%' limit 1" + ) + assert res == "" diff --git a/tests/queries/0_stateless/00396_uuid.reference b/tests/queries/0_stateless/00396_uuid.reference index d70322ec4c1..588f11cb466 100644 --- a/tests/queries/0_stateless/00396_uuid.reference +++ b/tests/queries/0_stateless/00396_uuid.reference @@ -6,3 +6,8 @@ 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 01234567-89ab-cdef-0123-456789abcdef 3f1ed72e-f7fe-4459-9cbe-95fe9298f845 1 +-- UUID variants -- +00112233445566778899AABBCCDDEEFF +33221100554477668899AABBCCDDEEFF +00112233-4455-6677-8899-aabbccddeeff +00112233-4455-6677-8899-aabbccddeeff diff --git a/tests/queries/0_stateless/00396_uuid.sql b/tests/queries/0_stateless/00396_uuid.sql index 9d8b48bddb0..4ad659e2464 100644 --- a/tests/queries/0_stateless/00396_uuid.sql +++ b/tests/queries/0_stateless/00396_uuid.sql @@ -11,3 +11,9 @@ with generateUUIDv4() as uuid, identity(lower(hex(reverse(reinterpretAsString(uuid))))) as str, reinterpretAsUUID(reverse(unhex(str))) as uuid2 select uuid = uuid2; + +select '-- UUID variants --'; +select hex(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 1)); +select hex(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 2)); +select UUIDNumToString(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 1), 1); +select UUIDNumToString(UUIDStringToNum('00112233-4455-6677-8899-aabbccddeeff', 2), 2); diff --git a/tests/queries/0_stateless/00398_url_functions.reference b/tests/queries/0_stateless/00398_url_functions.reference index feba95fb1b3..2e5a97b380e 100644 --- a/tests/queries/0_stateless/00398_url_functions.reference +++ b/tests/queries/0_stateless/00398_url_functions.reference @@ -8,6 +8,32 @@ http ====HOST==== www.example.com + + + + + + + + +www.example.com +127.0.0.1 +www.example.com +www.example.com +www.example.com +example.com +example.com +example.com +www.example.com +example.com +example.com +example.com +example.com +example.com +example.com + + + www.example.com 127.0.0.1 www.example.com diff --git a/tests/queries/0_stateless/00398_url_functions.sql b/tests/queries/0_stateless/00398_url_functions.sql index 66fe591bb58..cbefde7515a 100644 --- a/tests/queries/0_stateless/00398_url_functions.sql +++ b/tests/queries/0_stateless/00398_url_functions.sql @@ -8,6 +8,14 @@ SELECT protocol('//127.0.0.1:443/') AS Scheme; SELECT '====HOST===='; SELECT domain('http://paul@www.example.com:80/') AS Host; +SELECT domain('user:password@example.com:8080') AS Host; +SELECT domain('http://user:password@example.com:8080') AS Host; +SELECT domain('http://user:password@example.com:8080/path?query=value#fragment') AS Host; +SELECT domain('newuser:@example.com') AS Host; +SELECT domain('http://:pass@example.com') AS Host; +SELECT domain(':newpass@example.com') AS Host; +SELECT domain('http://user:pass@example@.com') AS Host; +SELECT domain('http://user:pass:example.com') AS Host; SELECT domain('http:/paul/example/com') AS Host; SELECT domain('http://www.example.com?q=4') AS Host; SELECT domain('http://127.0.0.1:443/') AS Host; @@ -17,6 +25,24 @@ SELECT domain('www.example.com') as Host; SELECT domain('example.com') as Host; SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; +SELECT domainRFC('http://paul@www.example.com:80/') AS Host; +SELECT domainRFC('user:password@example.com:8080') AS Host; +SELECT domainRFC('http://user:password@example.com:8080') AS Host; +SELECT domainRFC('http://user:password@example.com:8080/path?query=value#fragment') AS Host; +SELECT domainRFC('newuser:@example.com') AS Host; +SELECT domainRFC('http://:pass@example.com') AS Host; +SELECT domainRFC(':newpass@example.com') AS Host; +SELECT domainRFC('http://user:pass@example@.com') AS Host; +SELECT domainRFC('http://user:pass:example.com') AS Host; +SELECT domainRFC('http:/paul/example/com') AS Host; +SELECT domainRFC('http://www.example.com?q=4') AS Host; +SELECT domainRFC('http://127.0.0.1:443/') AS Host; +SELECT domainRFC('//www.example.com') AS Host; +SELECT domainRFC('//paul@www.example.com') AS Host; +SELECT domainRFC('www.example.com') as Host; +SELECT domainRFC('example.com') as Host; +SELECT domainWithoutWWWRFC('//paul@www.example.com') AS Host; +SELECT domainWithoutWWWRFC('http://paul@www.example.com:80/') AS Host; SELECT '====NETLOC===='; SELECT netloc('http://paul@www.example.com:80/') AS Netloc; diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference index dba46e48e43..58f8b7abfb3 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.reference @@ -35,7 +35,7 @@ Check total_bytes/total_rows for StripeLog 113 1 Check total_bytes/total_rows for Memory 0 0 -64 1 +256 1 Check total_bytes/total_rows for Buffer 0 0 256 50 diff --git a/tests/queries/0_stateless/02136_scalar_progress.reference b/tests/queries/0_stateless/02136_scalar_progress.reference index e9204f2d02e..5bb4f5640a8 100644 --- a/tests/queries/0_stateless/02136_scalar_progress.reference +++ b/tests/queries/0_stateless/02136_scalar_progress.reference @@ -2,5 +2,5 @@ < X-ClickHouse-Progress: {"read_rows":"65505","read_bytes":"524040","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"131010","read_bytes":"1048080","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"131011","read_bytes":"1048081","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"131011","read_bytes":"1048081","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"80"} -< X-ClickHouse-Summary: {"read_rows":"131011","read_bytes":"1048081","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"80"} +< X-ClickHouse-Progress: {"read_rows":"131011","read_bytes":"1048081","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Summary: {"read_rows":"131011","read_bytes":"1048081","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.reference b/tests/queries/0_stateless/02337_analyzer_columns_basic.reference new file mode 100644 index 00000000000..1482c79b602 --- /dev/null +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.reference @@ -0,0 +1,46 @@ +Empty from section +dummy UInt8 +0 +-- +dummy UInt8 +0 +-- +dummy UInt8 +0 +Table access without table name qualification +id UInt64 +0 +-- +value String +Value +-- +id UInt64 +value String +0 Value +Table access with table name qualification +id UInt64 +0 +-- +value String +Value +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Table access with database and table name qualification +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql new file mode 100644 index 00000000000..00dc6c1351e --- /dev/null +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql @@ -0,0 +1,101 @@ +-- Tags: no-parallel + +SET use_analyzer = 1; + +-- Empty from section + +SELECT 'Empty from section'; + +DESCRIBE (SELECT dummy); +SELECT dummy; + +SELECT '--'; + +DESCRIBE (SELECT one.dummy); +SELECT one.dummy; + +SELECT '--'; + +DESCRIBE (SELECT system.one.dummy); +SELECT system.one.dummy; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Table access without table name qualification'; + +SELECT test_id FROM test_table; -- { serverError 47 } +SELECT test_id FROM test_unknown_table; -- { serverError 60 } + +DESCRIBE (SELECT id FROM test_table); +SELECT id FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value FROM test_table); +SELECT value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT id, value FROM test_table); +SELECT id, value FROM test_table; + +SELECT 'Table access with table name qualification'; + +DESCRIBE (SELECT test_table.id FROM test_table); +SELECT test_table.id FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.value FROM test_table); +SELECT test_table.value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM test_table); +SELECT test_table.id, test_table.value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test.id, test.value FROM test_table AS test); +SELECT test.id, test.value FROM test_table AS test; + +DROP TABLE test_table; + +SELECT 'Table access with database and table name qualification'; + +DROP DATABASE IF EXISTS 02337_db; +CREATE DATABASE 02337_db; + +DROP TABLE IF EXISTS 02337_db.test_table; +CREATE TABLE 02337_db.test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02337_db.test_table VALUES (0, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM 02337_db.test_table); +SELECT test_table.id, test_table.value FROM 02337_db.test_table; + +SELECT '--'; + +DESCRIBE (SELECT 02337_db.test_table.id, 02337_db.test_table.value FROM 02337_db.test_table); +SELECT 02337_db.test_table.id, 02337_db.test_table.value FROM 02337_db.test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.id, test_table.value FROM 02337_db.test_table AS test_table); +SELECT test_table.id, test_table.value FROM 02337_db.test_table AS test_table; + +DROP TABLE 02337_db.test_table; +DROP DATABASE 02337_db; diff --git a/tests/queries/0_stateless/02338_analyzer_constants_basic.reference b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference new file mode 100644 index 00000000000..f3a69e4d835 --- /dev/null +++ b/tests/queries/0_stateless/02338_analyzer_constants_basic.reference @@ -0,0 +1,35 @@ +1 UInt8 +1 +-- +\'test\' String +test +-- +1 UInt8 +\'test\' String +1 test +-- +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +1 test [1,2,3] +-- +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +[\'1\', \'2\', \'3\'] Array(String) +1 test [1,2,3] ['1','2','3'] +-- +NULL Nullable(Nothing) +\N +-- +(1, 1) Tuple(UInt8, UInt8) +(1,1) +-- +array((1, 1)) Array(Tuple(UInt8, UInt8)) +[(1,1)] +NULL Nullable(Nothing) +1 UInt8 +\'test\' String +[1, 2, 3] Array(UInt8) +array((1, 1), (1, 1)) Array(Tuple(UInt8, UInt8)) +\N 1 test [1,2,3] [(1,1),(1,1)] diff --git a/tests/queries/0_stateless/02338_analyzer_constants_basic.sql b/tests/queries/0_stateless/02338_analyzer_constants_basic.sql new file mode 100644 index 00000000000..56f82763f1b --- /dev/null +++ b/tests/queries/0_stateless/02338_analyzer_constants_basic.sql @@ -0,0 +1,42 @@ +SET use_analyzer = 1; + +DESCRIBE (SELECT 1); +SELECT 1; + +SELECT '--'; + +DESCRIBE (SELECT 'test'); +SELECT 'test'; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test'); +SELECT 1, 'test'; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test', [1, 2, 3]); +SELECT 1, 'test', [1, 2, 3]; + +SELECT '--'; + +DESCRIBE (SELECT 1, 'test', [1, 2, 3], ['1', '2', '3']); +SELECT 1, 'test', [1, 2, 3], ['1', '2', '3']; + +SELECT '--'; + +DESCRIBE (SELECT NULL); +SELECT NULL; + +SELECT '--'; + +DESCRIBE (SELECT (1, 1)); +SELECT (1, 1); + +SELECT '--'; + +DESCRIBE (SELECT [(1, 1)]); +SELECT [(1, 1)]; + +DESCRIBE (SELECT NULL, 1, 'test', [1, 2, 3], [(1, 1), (1, 1)]); +SELECT NULL, 1, 'test', [1, 2, 3], [(1, 1), (1, 1)]; diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference new file mode 100644 index 00000000000..3ca0d303793 --- /dev/null +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.reference @@ -0,0 +1,98 @@ +Matchers without FROM section +dummy UInt8 +0 +-- +dummy UInt8 +0 +-- +dummy UInt8 +0 +Unqualified matchers +id UInt64 +value String +0 Value +-- +id UInt64 +0 +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Table qualified matchers +id UInt64 +value String +0 Value +-- +id UInt64 +0 +-- +id UInt64 +value String +0 Value +-- +id UInt64 +value String +0 Value +Database and table qualified matchers +APPLY transformer +-- +toString(id) String +toString(value) String +0 Value +-- +toString(id) String +toString(value) String +0 Value +-- +length(toString(id)) UInt64 +length(toString(value)) UInt64 +1 5 +-- +length(toString(id)) UInt64 +length(toString(value)) UInt64 +1 5 +-- +id UInt64 +value String +0 Value +EXCEPT transformer +-- +value String +Value +-- +value String +Value +-- +toString(value) String +Value +-- +toString(value) String +Value +REPLACE transformer +-- +5 UInt8 +value String +5 Value +-- +5 UInt8 +value String +5 Value +-- +5 UInt8 +6 UInt8 +5 6 +-- +5 UInt8 +6 UInt8 +5 6 +Combine EXCEPT, REPLACE, APPLY transformers +-- +toString(6) String +6 +-- +toString(6) String +6 diff --git a/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql new file mode 100644 index 00000000000..862cb2f88ff --- /dev/null +++ b/tests/queries/0_stateless/02339_analyzer_matcher_basic.sql @@ -0,0 +1,188 @@ +-- Tags: no-parallel + +SET use_analyzer = 1; + +SELECT 'Matchers without FROM section'; + +DESCRIBE (SELECT *); +SELECT *; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(dummy)); +SELECT COLUMNS(dummy); + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS('d')); +SELECT COLUMNS('d'); + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Unqualified matchers'; + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id) FROM test_table); +SELECT COLUMNS(id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id), COLUMNS(value) FROM test_table); +SELECT COLUMNS(id), COLUMNS(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS('i'), COLUMNS('v') FROM test_table); +SELECT COLUMNS('i'), COLUMNS('v') FROM test_table; + +SELECT 'Table qualified matchers'; + +DESCRIBE (SELECT test_table.* FROM test_table); +SELECT test_table.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS(id) FROM test_table); +SELECT test_table.COLUMNS(id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS(id), test_table.COLUMNS(value) FROM test_table); +SELECT test_table.COLUMNS(id), test_table.COLUMNS(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT test_table.COLUMNS('i'), test_table.COLUMNS('v') FROM test_table); +SELECT test_table.COLUMNS('i'), test_table.COLUMNS('v') FROM test_table; + +SELECT 'Database and table qualified matchers'; + +DROP DATABASE IF EXISTS 02339_db; +CREATE DATABASE 02339_db; + +DROP TABLE IF EXISTS 02339_db.test_table; +CREATE TABLE 02339_db.test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02339_db.test_table VALUES (0, 'Value'); + +-- TODO: Qualified COLUMNS where identifier has more than 2 parts are not supported on parser level + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.* FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.* FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS(id) FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS(id), 02339_db.test_table.COLUMNS(value) FROM 02339_db.test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table); +-- SELECT 02339_db.test_table.COLUMNS('i'), 02339_db.test_table.COLUMNS('v') FROM 02339_db.test_table; + +DROP TABLE 02339_db.test_table; +DROP DATABASE 02339_db; + +SELECT 'APPLY transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table); +SELECT * APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) FROM test_table); +SELECT * APPLY (x -> toString(x)) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table); +SELECT * APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * APPLY (x -> toString(x)) APPLY length FROM test_table); +SELECT * APPLY (x -> toString(x)) APPLY length FROM test_table; + +SELECT '--'; +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT 'EXCEPT transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT (id) FROM test_table); +SELECT * EXCEPT (id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT (id) FROM test_table); +SELECT COLUMNS(id, value) EXCEPT (id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT (id) APPLY toString FROM test_table); +SELECT * EXCEPT (id) APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT (id) APPLY toString FROM test_table); +SELECT COLUMNS(id, value) EXCEPT (id) APPLY toString FROM test_table; + +SELECT 'REPLACE transformer'; + +SELECT '--'; + +DESCRIBE (SELECT * REPLACE (5 AS id) FROM test_table); +SELECT * REPLACE (5 AS id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) REPLACE (5 AS id) FROM test_table); +SELECT COLUMNS(id, value) REPLACE (5 AS id) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT * REPLACE (5 AS id, 6 as value) FROM test_table); +SELECT * REPLACE (5 AS id, 6 as value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) REPLACE (5 AS id, 6 as value) FROM test_table); +SELECT COLUMNS(id, value) REPLACE (5 AS id, 6 as value) FROM test_table; + +SELECT 'Combine EXCEPT, REPLACE, APPLY transformers'; + +SELECT '--'; + +DESCRIBE (SELECT * EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table); +SELECT * EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT COLUMNS(id, value) EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table); +SELECT COLUMNS(id, value) EXCEPT id REPLACE (5 AS id, 6 as value) APPLY toString FROM test_table; diff --git a/tests/queries/0_stateless/02340_analyzer_functions.reference b/tests/queries/0_stateless/02340_analyzer_functions.reference new file mode 100644 index 00000000000..fe086c69e91 --- /dev/null +++ b/tests/queries/0_stateless/02340_analyzer_functions.reference @@ -0,0 +1,11 @@ +plus(1, 1) UInt16 +2 +-- +plus(dummy, dummy) UInt16 +0 +-- +plus(id, length(value)) UInt64 +5 +-- +concat(concat(toString(id), \'_\'), value) String +0_Value diff --git a/tests/queries/0_stateless/02340_analyzer_functions.sql b/tests/queries/0_stateless/02340_analyzer_functions.sql new file mode 100644 index 00000000000..b1bfeabd836 --- /dev/null +++ b/tests/queries/0_stateless/02340_analyzer_functions.sql @@ -0,0 +1,28 @@ +SET use_analyzer = 1; + +DESCRIBE (SELECT 1 + 1); +SELECT 1 + 1; + +SELECT '--'; + +DESCRIBE (SELECT dummy + dummy); +SELECT dummy + dummy; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT id + length(value) FROM test_table); +SELECT id + length(value) FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT concat(concat(toString(id), '_'), (value)) FROM test_table); +SELECT concat(concat(toString(id), '_'), (value)) FROM test_table; diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference new file mode 100644 index 00000000000..3733d6b6084 --- /dev/null +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference @@ -0,0 +1,19 @@ +Aliases to constants +1 1 +4 2 1 3 4 +1 +1 1 +1 1 2 +1 2 1 +3 6 +Aliases to columns +0 0 0 +0 Value 0 Value +0 Value +Alias conflict with identifier inside expression +0 +1 +3 +Alias setting prefer_column_name_to_alias +0 +Value diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql new file mode 100644 index 00000000000..636936d875a --- /dev/null +++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql @@ -0,0 +1,50 @@ +SET use_analyzer = 1; + +SELECT 'Aliases to constants'; + +SELECT 1 as a, a; +SELECT (c + 1) as d, (a + 1) as b, 1 AS a, (b + 1) as c, d; + +WITH 1 as a SELECT a; +WITH a as b SELECT 1 as a, b; + +SELECT 1 AS x, x, x + 1; +SELECT x, x + 1, 1 AS x; +SELECT x, 1 + (2 + (3 AS x)); + +SELECT a AS b, b AS a; -- { serverError 174 } + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Aliases to columns'; + +SELECT id_alias_2, id AS id_alias, id_alias as id_alias_2 FROM test_table; +SELECT id_1, value_1, id as id_1, value as value_1 FROM test_table; + +WITH value_1 as value_2, id_1 as id_2, id AS id_1, value AS value_1 SELECT id_2, value_2 FROM test_table; + +SELECT (id + b) AS id, id as b FROM test_table; -- { serverError 174 } +SELECT (1 + b + 1 + id) AS id, b as c, id as b FROM test_table; -- { serverError 174 } + +SELECT 'Alias conflict with identifier inside expression'; + +SELECT id AS id FROM test_table; +SELECT (id + 1) AS id FROM test_table; +SELECT (id + 1 + 1 + 1 + id) AS id FROM test_table; + +SELECT 'Alias setting prefer_column_name_to_alias'; + +WITH id AS value SELECT value FROM test_table; + +SET prefer_column_name_to_alias = 1; +WITH id AS value SELECT value FROM test_table; +SET prefer_column_name_to_alias = 0; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02342_analyzer_compound_types.reference b/tests/queries/0_stateless/02342_analyzer_compound_types.reference new file mode 100644 index 00000000000..51e0bbe6e92 --- /dev/null +++ b/tests/queries/0_stateless/02342_analyzer_compound_types.reference @@ -0,0 +1,106 @@ +Constant tuple +(1,'Value') 1 Value +(1,'Value') 1 Value +(1,'Value') 1 +(1,'Value') Value +(1,'Value') 1 +(1,'Value') Value +Tuple +-- +id UInt64 +value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +0 (('value_0_level_1','value_1_level_1'),'value_1_level_0') +-- +id UInt64 +value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +0 (('value_0_level_1','value_1_level_1'),'value_1_level_0') +-- +value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +value.value_1_level_0 String +('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +alias_value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_1_level_0 String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') ('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +alias_value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_1_level_0 String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') ('value_0_level_1','value_1_level_1') value_1_level_0 +-- +alias_value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +toString(alias_value.value_0_level_0) String +toString(alias_value.value_1_level_0) String +(('value_0_level_1','value_1_level_1'),'value_1_level_0') (\'value_0_level_1\',\'value_1_level_1\') value_1_level_0 +-- +value.value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String) +value.value_1_level_0 String +('value_0_level_1','value_1_level_1') value_1_level_0 +-- +toString(value.value_0_level_0) String +toString(value.value_1_level_0) String +(\'value_0_level_1\',\'value_1_level_1\') value_1_level_0 +-- +value.value_0_level_0.value_0_level_1 String +value.value_0_level_0.value_1_level_1 String +value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_0_level_1 String +alias_value.value_1_level_1 String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +alias_value.value_0_level_1 String +alias_value.value_1_level_1 String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +alias_value Tuple(value_0_level_1 String, value_1_level_1 String) +toString(alias_value.value_0_level_1) String +toString(alias_value.value_1_level_1) String +('value_0_level_1','value_1_level_1') value_0_level_1 value_1_level_1 +-- +value.value_0_level_0.value_0_level_1 String +value.value_0_level_0.value_1_level_1 String +value_0_level_1 value_1_level_1 +-- +toString(value.value_0_level_0.value_0_level_1) String +toString(value.value_0_level_0.value_1_level_1) String +value_0_level_1 value_1_level_1 +Nested +id UInt64 +value.value_0_level_0 Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value.value_1_level_0 Array(String) +0 [[('value_0_level_1','value_1_level_1')]] ['value_1_level_0'] +-- +value.value_0_level_0 Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value.value_1_level_0 Array(String) +[[('value_0_level_1','value_1_level_1')]] ['value_1_level_0'] +-- +value.value_0_level_0.value_0_level_1 Array(Array(String)) +value.value_0_level_0.value_1_level_1 Array(Array(String)) +[['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value_alias.value_0_level_1 Array(Array(String)) +value_alias.value_1_level_1 Array(Array(String)) +[[('value_0_level_1','value_1_level_1')]] [['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +value_alias.value_0_level_1 Array(Array(String)) +value_alias.value_1_level_1 Array(Array(String)) +[[('value_0_level_1','value_1_level_1')]] [['value_0_level_1']] [['value_1_level_1']] +-- +value_alias Array(Nested(value_0_level_1 String, value_1_level_1 String)) +toString(value_alias.value_0_level_1) String +toString(value_alias.value_1_level_1) String +[[('value_0_level_1','value_1_level_1')]] [[\'value_0_level_1\']] [[\'value_1_level_1\']] +-- +value.value_0_level_0.value_0_level_1 Array(Array(String)) +value.value_0_level_0.value_1_level_1 Array(Array(String)) +[['value_0_level_1']] [['value_1_level_1']] +-- +toString(value.value_0_level_0.value_0_level_1) String +toString(value.value_0_level_0.value_1_level_1) String +[[\'value_0_level_1\']] [[\'value_1_level_1\']] diff --git a/tests/queries/0_stateless/02342_analyzer_compound_types.sql b/tests/queries/0_stateless/02342_analyzer_compound_types.sql new file mode 100644 index 00000000000..6b4f323fd66 --- /dev/null +++ b/tests/queries/0_stateless/02342_analyzer_compound_types.sql @@ -0,0 +1,195 @@ +SET use_analyzer = 1; + +SELECT 'Constant tuple'; + +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.id, value.value; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.* APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS(id) APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS(value) APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS('i') APPLY toString; +SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, value.COLUMNS('v') APPLY toString; + +SELECT 'Tuple'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, (('value_0_level_1', 'value_1_level_1'), 'value_1_level_0')); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT id, value FROM test_table); +SELECT id, value FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.value_0_level_0, alias_value.value_1_level_0 FROM test_table); +SELECT value AS alias_value, alias_value.value_0_level_0, alias_value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.* FROM test_table); +SELECT value AS alias_value, alias_value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value AS alias_value, alias_value.* APPLY toString FROM test_table); +SELECT value AS alias_value, alias_value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.* FROM test_table); +SELECT value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.* APPLY toString FROM test_table); +SELECT value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table); +SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +SELECT value.value_0_level_0.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +DROP TABLE test_table; + +-- SELECT 'Array of tuples'; + +-- DROP TABLE IF EXISTS test_table; +-- CREATE TABLE test_table +-- ( +-- id UInt64, +-- value Array(Tuple(value_0_level_0 Tuple(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String)) +-- ) ENGINE=MergeTree ORDER BY id; + +-- INSERT INTO test_table VALUES (0, [('value_0_level_1', 'value_1_level_1')], ['value_1_level_0']); + +-- DESCRIBE (SELECT * FROM test_table); +-- SELECT * FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +-- SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +-- SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.value_0_level_1, alias_value.value_1_level_1 FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.* FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table); +-- SELECT value.value_0_level_0 AS alias_value, alias_value.* APPLY toString FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +-- SELECT value.value_0_level_0.* FROM test_table; + +-- SELECT '--'; + +-- DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +-- SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +-- DROP TABLE test_table; + +SELECT 'Nested'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value Nested (value_0_level_0 Nested(value_0_level_1 String, value_1_level_1 String), value_1_level_0 String) +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, [[('value_0_level_1', 'value_1_level_1')]], ['value_1_level_0']); + +DESCRIBE (SELECT * FROM test_table); +SELECT * FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table); +SELECT value.value_0_level_0, value.value_1_level_0 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0.value_0_level_1, value.value_0_level_0.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.value_0_level_1, value_alias.value_1_level_1 FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.value_0_level_1, value_alias.value_1_level_1 FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.* FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0 AS value_alias, value_alias.* APPLY toString FROM test_table); +SELECT value.value_0_level_0 AS value_alias, value_alias.* APPLY toString FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* FROM test_table); +SELECT value.value_0_level_0.* FROM test_table; + +SELECT '--'; + +DESCRIBE (SELECT value.value_0_level_0.* APPLY toString FROM test_table); +SELECT value.value_0_level_0.* APPLY toString FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference new file mode 100644 index 00000000000..4904e950431 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.reference @@ -0,0 +1,2 @@ +Value +1 2 diff --git a/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql new file mode 100644 index 00000000000..657e2098b4a --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_column_transformers_strict.sql @@ -0,0 +1,18 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT * EXCEPT (id) FROM test_table; +SELECT * EXCEPT STRICT (id, value1) FROM test_table; -- { serverError 36 } + +SELECT * REPLACE STRICT (1 AS id, 2 AS value) FROM test_table; +SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table; -- { serverError 36 } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.reference b/tests/queries/0_stateless/02343_analyzer_lambdas.reference new file mode 100644 index 00000000000..8d29481c255 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.reference @@ -0,0 +1,29 @@ +Standalone lambdas +2 +1 \N [1,2,3] +1 \N [1,2,3] +1 +0 Value +Lambda as function parameter +[2,3,4] +[2,3,4] +['1','2','3'] ['1','2','3'] +['1','2','3'] ['1','2','3'] +[0,0,0] +[1,2,3] +['1_0','2_0','3_0'] +Lambda compound argument +(1,'Value') 1_Value +value_0_level_0_value_1_level_0 +Lambda matcher +0 +0 Value +[1,1,1] +[2,2,2] +0 1 1 +0 2 2 +Lambda untuple +(1,'Value') 1 Value +Lambda carrying +2 1 +1 0 diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql new file mode 100644 index 00000000000..1c6200e5eb6 --- /dev/null +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -0,0 +1,69 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Standalone lambdas'; + +WITH x -> x + 1 AS lambda SELECT lambda(1); +WITH x -> toString(x) AS lambda SELECT lambda(1), lambda(NULL), lambda([1,2,3]); +WITH x -> toString(x) AS lambda_1, lambda_1 AS lambda_2, lambda_2 AS lambda_3 SELECT lambda_1(1), lambda_2(NULL), lambda_3([1,2,3]); + +WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; +WITH x -> toString(x) AS lambda SELECT lambda(id), lambda(value) FROM test_table; + +SELECT 'Lambda as function parameter'; + +SELECT arrayMap(x -> x + 1, [1,2,3]); +WITH x -> x + 1 AS lambda SELECT arrayMap(lambda, [1,2,3]); +SELECT arrayMap((x -> toString(x)) as lambda, [1,2,3]), arrayMap(lambda, ['1','2','3']); +WITH x -> toString(x) AS lambda_1 SELECT arrayMap(lambda_1 AS lambda_2, [1,2,3]), arrayMap(lambda_2, ['1', '2', '3']); + +SELECT arrayMap(x -> id, [1,2,3]) FROM test_table; +SELECT arrayMap(x -> x + id, [1,2,3]) FROM test_table; +SELECT arrayMap((x -> concat(concat(toString(x), '_'), toString(id))) as lambda, [1,2,3]) FROM test_table; + +SELECT 'Lambda compound argument'; + +DROP TABLE IF EXISTS test_table_tuple; +CREATE TABLE test_table_tuple +( + id UInt64, + value Tuple(value_0_level_0 String, value_1_level_0 String) +) ENGINE=TinyLog; + +INSERT INTO test_table_tuple VALUES (0, ('value_0_level_0', 'value_1_level_0')); + +WITH x -> concat(concat(toString(x.id), '_'), x.value) AS lambda SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, lambda(value); +WITH x -> concat(concat(x.value_0_level_0, '_'), x.value_1_level_0) AS lambda SELECT lambda(value) FROM test_table_tuple; + +SELECT 'Lambda matcher'; + +WITH x -> * AS lambda SELECT lambda(1); +WITH x -> * AS lambda SELECT lambda(1) FROM test_table; + +WITH cast(tuple(1), 'Tuple (value UInt64)') AS compound_value SELECT arrayMap(x -> compound_value.*, [1,2,3]); +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT arrayMap(x -> compound_value.*, [1,2,3]); -- { serverError 1 } +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT arrayMap(x -> plus(compound_value.*), [1,2,3]); + +WITH cast(tuple(1), 'Tuple (value UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> compound_value.* FROM test_table; +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> compound_value.* FROM test_table; -- { serverError 1 } +WITH cast(tuple(1, 1), 'Tuple (value_1 UInt64, value_2 UInt64)') AS compound_value SELECT id, test_table.* APPLY x -> plus(compound_value.*) FROM test_table; + +SELECT 'Lambda untuple'; + +WITH x -> untuple(x) AS lambda SELECT cast((1, 'Value'), 'Tuple (id UInt64, value String)') AS value, lambda(value); + +SELECT 'Lambda carrying'; + +WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, 1), lambda(functor_2, 1); +WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, id), lambda(functor_2, id) FROM test_table; + +DROP TABLE test_table_tuple; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference new file mode 100644 index 00000000000..e0d1bb800d2 --- /dev/null +++ b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.reference @@ -0,0 +1,4 @@ +1 1 +0 0 +2 +1 1 diff --git a/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql new file mode 100644 index 00000000000..2569dc8ba9c --- /dev/null +++ b/tests/queries/0_stateless/02344_analyzer_multiple_aliases_for_expression.sql @@ -0,0 +1,27 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 1 AS value, 1 AS value; +SELECT id AS value, id AS value FROM test_table; +WITH x -> x + 1 AS lambda, x -> x + 1 AS lambda SELECT lambda(1); +SELECT (SELECT 1) AS subquery, (SELECT 1) AS subquery; + +SELECT 1 AS value, 2 AS value; -- { serverError 179 } +SELECT plus(1, 1) AS value, 2 AS value; -- { serverError 179 } +SELECT (SELECT 1) AS subquery, 1 AS subquery; -- { serverError 179 } +WITH x -> x + 1 AS lambda, x -> x + 2 AS lambda SELECT lambda(1); -- { serverError 179 } +WITH x -> x + 1 AS lambda SELECT (SELECT 1) AS lambda; -- { serverError 179 } +WITH x -> x + 1 AS lambda SELECT 1 AS lambda; -- { serverError 179 } +SELECT id AS value, value AS value FROM test_table; -- { serverError 179 } +SELECT id AS value_1, value AS value_1 FROM test_table; -- { serverError 179 } +SELECT id AS value, (id + 1) AS value FROM test_table; -- { serverError 179 } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02345_analyzer_subqueries.reference b/tests/queries/0_stateless/02345_analyzer_subqueries.reference new file mode 100644 index 00000000000..1e70be9ef52 --- /dev/null +++ b/tests/queries/0_stateless/02345_analyzer_subqueries.reference @@ -0,0 +1,27 @@ +Scalar subqueries +1 +1 +0 +Value +(0,'Value') +Subqueries FROM section +1 +1 +1 +1 +1 +1 +0 Value +0 Value +2 +Subqueries CTE +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 diff --git a/tests/queries/0_stateless/02345_analyzer_subqueries.sql b/tests/queries/0_stateless/02345_analyzer_subqueries.sql new file mode 100644 index 00000000000..3694028e5d2 --- /dev/null +++ b/tests/queries/0_stateless/02345_analyzer_subqueries.sql @@ -0,0 +1,51 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT 'Scalar subqueries'; + +SELECT (SELECT 1); +WITH 1 AS a SELECT (SELECT a); + +SELECT (SELECT id FROM test_table); +SELECT (SELECT value FROM test_table); +SELECT (SELECT id, value FROM test_table); + +SELECT 'Subqueries FROM section'; + +SELECT a FROM (SELECT 1 AS a) AS b; +SELECT b.a FROM (SELECT 1 AS a) AS b; + +SELECT a FROM (SELECT 1 AS a) AS b; +SELECT b.a FROM (SELECT 1 AS a) AS b; + +WITH 1 AS global_a SELECT a FROM (SELECT global_a AS a) AS b; +WITH 1 AS global_a SELECT b.a FROM (SELECT global_a AS a) AS b; + +SELECT * FROM (SELECT * FROM (SELECT * FROM test_table)); +SELECT * FROM (SELECT id, value FROM (SELECT * FROM test_table)); + +WITH 1 AS a SELECT (SELECT * FROM (SELECT * FROM (SELECT a + 1))) SETTINGS use_analyzer=1; + +SELECT 'Subqueries CTE'; + +WITH subquery AS (SELECT 1 AS a) SELECT * FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT a FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.a FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.* FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery.* APPLY toString FROM subquery; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.a FROM subquery AS subquery_alias; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.* FROM subquery AS subquery_alias; +WITH subquery AS (SELECT 1 AS a) SELECT subquery_alias.* APPLY toString FROM subquery AS subquery_alias; + +WITH subquery_1 AS (SELECT 1 AS a), subquery_2 AS (SELECT 1 + subquery_1.a FROM subquery_1) SELECT * FROM subquery_2; +WITH subquery_1 AS (SELECT 1 AS a), subquery_2 AS (SELECT (1 + subquery_1.a) AS a FROM subquery_1) SELECT subquery_2.a FROM subquery_2; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02366_explain_query_tree.reference b/tests/queries/0_stateless/02366_explain_query_tree.reference new file mode 100644 index 00000000000..769d7661e68 --- /dev/null +++ b/tests/queries/0_stateless/02366_explain_query_tree.reference @@ -0,0 +1,102 @@ +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 1 + CONSTANT id: 2, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + IDENTIFIER id: 3, identifier: system.one +-- +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 2 + IDENTIFIER id: 2, identifier: id + IDENTIFIER id: 3, identifier: value + JOIN TREE + IDENTIFIER id: 4, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + value String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: value, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test_table +-- +QUERY id: 0 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: arrayMap, function_type: ordinary + ARGUMENTS + LIST id: 3, nodes: 2 + LAMBDA id: 4 + ARGUMENTS + LIST id: 5, nodes: 1 + IDENTIFIER id: 6, identifier: x + EXPRESSION + FUNCTION id: 7, function_name: plus, function_type: ordinary + ARGUMENTS + LIST id: 8, nodes: 2 + IDENTIFIER id: 9, identifier: x + IDENTIFIER id: 10, identifier: id + CONSTANT id: 11, constant_value: Array_[UInt64_1, UInt64_2, UInt64_3], constant_value_type: Array(UInt8) + JOIN TREE + IDENTIFIER id: 12, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + arrayMap(lambda(tuple(x), plus(x, 1)), [1, 2, 3]) Array(UInt16) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: arrayMap, function_type: ordinary, result_type: Array(UInt16) + ARGUMENTS + LIST id: 3, nodes: 2 + LAMBDA id: 4 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: x, result_type: UInt8, source_id: 4 + EXPRESSION + FUNCTION id: 7, function_name: plus, function_type: ordinary, result_type: UInt16 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 6, column_name: x, result_type: UInt8, source_id: 4 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: Array_[UInt64_1, UInt64_2, UInt64_3], constant_value_type: Array(UInt8) + JOIN TREE + TABLE id: 11, table_name: default.test_table +-- +QUERY id: 0 + WITH + LIST id: 1, nodes: 1 + LAMBDA id: 2, alias: lambda + ARGUMENTS + LIST id: 3, nodes: 1 + IDENTIFIER id: 4, identifier: x + EXPRESSION + FUNCTION id: 5, function_name: plus, function_type: ordinary + ARGUMENTS + LIST id: 6, nodes: 2 + IDENTIFIER id: 7, identifier: x + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: UInt8 + PROJECTION + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: lambda, function_type: ordinary + ARGUMENTS + LIST id: 11, nodes: 1 + IDENTIFIER id: 12, identifier: id + JOIN TREE + IDENTIFIER id: 13, identifier: test_table +-- +QUERY id: 0 + PROJECTION COLUMNS + lambda(id) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE id: 5, table_name: default.test_table diff --git a/tests/queries/0_stateless/02366_explain_query_tree.sql b/tests/queries/0_stateless/02366_explain_query_tree.sql new file mode 100644 index 00000000000..23df6d12121 --- /dev/null +++ b/tests/queries/0_stateless/02366_explain_query_tree.sql @@ -0,0 +1,38 @@ +SET use_analyzer = 1; + +EXPLAIN QUERY TREE SELECT 1; + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +EXPLAIN QUERY TREE SELECT id, value FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; + +SELECT '--'; + +EXPLAIN QUERY TREE run_passes = 1 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference new file mode 100644 index 00000000000..1f4875e38c2 --- /dev/null +++ b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.reference @@ -0,0 +1,3 @@ +0 6 5 +0 Value 2 +0 Value 2 diff --git a/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql new file mode 100644 index 00000000000..769ba769f4d --- /dev/null +++ b/tests/queries/0_stateless/02367_analyzer_table_alias_columns.sql @@ -0,0 +1,41 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + alias_value_1 ALIAS id + alias_value_2 + 1, + alias_value_2 ALIAS id + 5 +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0); + +SELECT id, alias_value_1, alias_value_2 FROM test_table; + +DROP TABLE test_table; + +CREATE TABLE test_table +( + id UInt64, + value String, + alias_value ALIAS ((id + 1) AS inside_value) + inside_value +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT id, value, alias_value FROM test_table; + +DROP TABLE test_table; + +CREATE TABLE test_table +( + id UInt64, + value String, + alias_value ALIAS ((id + 1) AS value) + value +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT id, value, alias_value FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02368_analyzer_table_functions.reference b/tests/queries/0_stateless/02368_analyzer_table_functions.reference new file mode 100644 index 00000000000..0c1bc4d90a2 --- /dev/null +++ b/tests/queries/0_stateless/02368_analyzer_table_functions.reference @@ -0,0 +1,6 @@ +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +1 2 [1,2,3] [['abc'],[],['d','e']] +CSV 1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]" 1 2 [1,2,3] [['abc'],[],['d','e']] diff --git a/tests/queries/0_stateless/02368_analyzer_table_functions.sql b/tests/queries/0_stateless/02368_analyzer_table_functions.sql new file mode 100644 index 00000000000..6d9accef30c --- /dev/null +++ b/tests/queries/0_stateless/02368_analyzer_table_functions.sql @@ -0,0 +1,10 @@ +SET use_analyzer = 1; + +SELECT c1, c2, c3, c4 FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); +SELECT f.c1, f.c2, f.c3, f.c4 FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"') AS f; +SELECT f.* FROM format('CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"') AS f; + +WITH 'CSV', '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"' AS format_value SELECT c1, c2, c3, c4 FROM format('CSV', format_value); +WITH concat('1,2,"[1,2,3]",','"[[\'abc\'], [], [\'d\', \'e\']]"') AS format_value SELECT c1, c2, c3, c4 FROM format('CSV', format_value); + +SELECT format, format_value, c1, c2, c3, c4 FROM format('CSV' AS format, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"' AS format_value); diff --git a/tests/queries/0_stateless/02369_analyzer_array_join_function.reference b/tests/queries/0_stateless/02369_analyzer_array_join_function.reference new file mode 100644 index 00000000000..7025cff6909 --- /dev/null +++ b/tests/queries/0_stateless/02369_analyzer_array_join_function.reference @@ -0,0 +1,44 @@ +1 +2 +3 +-- +1 1 +2 2 +3 3 +-- +1 1 +2 2 +3 3 +-- +[1,2,3] 1 +[1,2,3] 2 +[1,2,3] 3 +-- +1 1 +1 2 +1 3 +1 4 +2 1 +2 2 +2 3 +2 4 +3 1 +3 2 +3 3 +3 4 +-- +[1,1,1] +[2,2,2] +[3,3,3] +-- +[2,3,4] 1 +[3,4,5] 2 +[4,5,6] 3 +-- +0 1 +0 2 +0 3 +-- +0 1 1 +0 2 2 +0 3 3 diff --git a/tests/queries/0_stateless/02369_analyzer_array_join_function.sql b/tests/queries/0_stateless/02369_analyzer_array_join_function.sql new file mode 100644 index 00000000000..db9884f9cdd --- /dev/null +++ b/tests/queries/0_stateless/02369_analyzer_array_join_function.sql @@ -0,0 +1,59 @@ +SET use_analyzer = 1; + +SELECT arrayJoin([1, 2, 3]); + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, arrayJoin([1, 2, 3]); + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, a; + +SELECT '--'; + +SELECT arrayJoin([[1, 2, 3]]) AS a, arrayJoin(a) AS b; + +SELECT '--'; + +SELECT arrayJoin([1, 2, 3]) AS a, arrayJoin([1, 2, 3, 4]) AS b; + +SELECT '--'; + +SELECT arrayMap(x -> arrayJoin([1, 2, 3]), [1, 2, 3]); + +SELECT arrayMap(x -> arrayJoin(x), [[1, 2, 3]]); -- { serverError 36 } + +SELECT arrayMap(x -> arrayJoin(cast(x, 'Array(UInt8)')), [[1, 2, 3]]); -- { serverError 36 } + +SELECT '--'; + +SELECT arrayMap(x -> x + a, [1, 2, 3]), arrayJoin([1,2,3]) as a; + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value_1 Array(UInt8), + value_2 Array(UInt8), +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, [1, 2, 3], [1, 2, 3, 4]); + +SELECT id, arrayJoin(value_1) FROM test_table; + +SELECT '--'; + +SELECT id, arrayJoin(value_1) AS a, a FROM test_table; + +-- SELECT '--'; + +-- SELECT id, arrayJoin(value_1), arrayJoin(value_2) FROM test_table; + +-- SELECT '--'; + +-- SELECT id, arrayJoin(value_1), arrayJoin(value_2), arrayJoin([5, 6]) FROM test_table; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.reference b/tests/queries/0_stateless/02370_analyzer_in_function.reference new file mode 100644 index 00000000000..fda174c0b7c --- /dev/null +++ b/tests/queries/0_stateless/02370_analyzer_in_function.reference @@ -0,0 +1,14 @@ +1 +1 +0 +0 +1 +1 +0 +1 +-- +1 +0 +1 +1 +0 diff --git a/tests/queries/0_stateless/02370_analyzer_in_function.sql b/tests/queries/0_stateless/02370_analyzer_in_function.sql new file mode 100644 index 00000000000..01e5cf498cd --- /dev/null +++ b/tests/queries/0_stateless/02370_analyzer_in_function.sql @@ -0,0 +1,23 @@ +SET use_analyzer = 1; + +SELECT 1 IN 1; +SELECT 1 IN (1); +SELECT 1 IN 0; +SELECT 1 IN (0); +SELECT 1 IN (1, 2); +SELECT (1, 1) IN ((1, 1), (1, 2)); +SELECT (1, 1) IN ((1, 2), (1, 2)); +SELECT 1 IN (((1), (2))); + +SELECT '--'; + +SELECT 1 IN [1]; +SELECT 1 IN [0]; +SELECT 1 IN [1, 2]; +SELECT (1, 1) IN [(1, 1), (1, 2)]; +SELECT (1, 1) IN [(1, 2), (1, 2)]; + +SELECT (1, 2) IN 1; -- { serverError 43 } +SELECT (1, 2) IN [1]; -- { serverError 124 } +SELECT (1, 2) IN (((1, 2), (1, 2)), ((1, 2), (1, 2))); -- { serverError 43 } +SELECT (1, 2) IN [((1, 2), (1, 2)), ((1, 2), (1, 2))]; -- { serverError 43 } diff --git a/tests/queries/0_stateless/02371_analyzer_join_cross.reference b/tests/queries/0_stateless/02371_analyzer_join_cross.reference new file mode 100644 index 00000000000..50e43ac28d1 --- /dev/null +++ b/tests/queries/0_stateless/02371_analyzer_join_cross.reference @@ -0,0 +1,133 @@ +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 Join_1_Value_0 0 Join_2_Value_0 1 Join_3_Value_1 +0 Join_1_Value_0 0 Join_2_Value_0 2 Join_3_Value_2 +0 Join_1_Value_0 1 Join_2_Value_1 0 Join_3_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 1 Join_3_Value_1 +0 Join_1_Value_0 1 Join_2_Value_1 2 Join_3_Value_2 +0 Join_1_Value_0 2 Join_2_Value_2 0 Join_3_Value_0 +0 Join_1_Value_0 2 Join_2_Value_2 1 Join_3_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 2 Join_3_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 Join_2_Value_0 1 Join_3_Value_1 +1 Join_1_Value_1 0 Join_2_Value_0 2 Join_3_Value_2 +1 Join_1_Value_1 1 Join_2_Value_1 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +1 Join_1_Value_1 1 Join_2_Value_1 2 Join_3_Value_2 +1 Join_1_Value_1 2 Join_2_Value_2 0 Join_3_Value_0 +1 Join_1_Value_1 2 Join_2_Value_2 1 Join_3_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 2 Join_3_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 0 Join_3_Value_0 +3 Join_1_Value_3 0 Join_2_Value_0 1 Join_3_Value_1 +3 Join_1_Value_3 0 Join_2_Value_0 2 Join_3_Value_2 +3 Join_1_Value_3 1 Join_2_Value_1 0 Join_3_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 1 Join_3_Value_1 +3 Join_1_Value_3 1 Join_2_Value_1 2 Join_3_Value_2 +3 Join_1_Value_3 2 Join_2_Value_2 0 Join_3_Value_0 +3 Join_1_Value_3 2 Join_2_Value_2 1 Join_3_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 2 Join_3_Value_2 +-- +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 Join_1_Value_0 0 Join_2_Value_0 1 Join_3_Value_1 +0 Join_1_Value_0 0 Join_2_Value_0 2 Join_3_Value_2 +0 Join_1_Value_0 1 Join_2_Value_1 0 Join_3_Value_0 +0 Join_1_Value_0 1 Join_2_Value_1 1 Join_3_Value_1 +0 Join_1_Value_0 1 Join_2_Value_1 2 Join_3_Value_2 +0 Join_1_Value_0 2 Join_2_Value_2 0 Join_3_Value_0 +0 Join_1_Value_0 2 Join_2_Value_2 1 Join_3_Value_1 +0 Join_1_Value_0 2 Join_2_Value_2 2 Join_3_Value_2 +1 Join_1_Value_1 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 Join_2_Value_0 1 Join_3_Value_1 +1 Join_1_Value_1 0 Join_2_Value_0 2 Join_3_Value_2 +1 Join_1_Value_1 1 Join_2_Value_1 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +1 Join_1_Value_1 1 Join_2_Value_1 2 Join_3_Value_2 +1 Join_1_Value_1 2 Join_2_Value_2 0 Join_3_Value_0 +1 Join_1_Value_1 2 Join_2_Value_2 1 Join_3_Value_1 +1 Join_1_Value_1 2 Join_2_Value_2 2 Join_3_Value_2 +3 Join_1_Value_3 0 Join_2_Value_0 0 Join_3_Value_0 +3 Join_1_Value_3 0 Join_2_Value_0 1 Join_3_Value_1 +3 Join_1_Value_3 0 Join_2_Value_0 2 Join_3_Value_2 +3 Join_1_Value_3 1 Join_2_Value_1 0 Join_3_Value_0 +3 Join_1_Value_3 1 Join_2_Value_1 1 Join_3_Value_1 +3 Join_1_Value_3 1 Join_2_Value_1 2 Join_3_Value_2 +3 Join_1_Value_3 2 Join_2_Value_2 0 Join_3_Value_0 +3 Join_1_Value_3 2 Join_2_Value_2 1 Join_3_Value_1 +3 Join_1_Value_3 2 Join_2_Value_2 2 Join_3_Value_2 +-- +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 Join_1_Value_0 Join_1_Value_0 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +1 1 Join_1_Value_1 Join_1_Value_1 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 0 0 Join_2_Value_0 Join_2_Value_0 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 1 1 Join_2_Value_1 Join_2_Value_1 2 2 Join_3_Value_2 Join_3_Value_2 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 0 0 Join_3_Value_0 Join_3_Value_0 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 1 1 Join_3_Value_1 Join_3_Value_1 +3 3 Join_1_Value_3 Join_1_Value_3 2 2 Join_2_Value_2 Join_2_Value_2 2 2 Join_3_Value_2 Join_3_Value_2 diff --git a/tests/queries/0_stateless/02371_analyzer_join_cross.sql b/tests/queries/0_stateless/02371_analyzer_join_cross.sql new file mode 100644 index 00000000000..0c0709fb951 --- /dev/null +++ b/tests/queries/0_stateless/02371_analyzer_join_cross.sql @@ -0,0 +1,78 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (3, 'Join_1_Value_3'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (2, 'Join_2_Value_2'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (2, 'Join_3_Value_2'); + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1, test_table_join_2; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value FROM test_table_join_1 AS t1, test_table_join_2 AS t2; + +SELECT '--'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1, test_table_join_2, test_table_join_3; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3; + +SELECT id FROM test_table_join_1, test_table_join_2; -- { serverError 207 } + +SELECT value FROM test_table_join_1, test_table_join_2; -- { serverError 207 } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02372_analyzer_join.reference b/tests/queries/0_stateless/02372_analyzer_join.reference new file mode 100644 index 00000000000..b8a658106ff --- /dev/null +++ b/tests/queries/0_stateless/02372_analyzer_join.reference @@ -0,0 +1,1554 @@ +-- { echoOn } + +SELECT 'JOIN INNER'; +JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +SELECT id FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT 'JOIN LEFT'; +JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +Join_1_Value_2 +SELECT id FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT 'JOIN RIGHT'; +JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 + Join_2_Value_3 +SELECT id FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN FULL'; +JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +Join_1_Value_0 Join_2_Value_0 +Join_1_Value_1 Join_2_Value_1 +Join_1_Value_2 + Join_2_Value_3 +SELECT id FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT value FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 0 +2 Join_1_Value_2 0 +0 1 Join_2_Value_1 +0 3 Join_2_Value_3 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +0 3 Join_2_Value_3 +SELECT 'First JOIN INNER second JOIN INNER'; +First JOIN INNER second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN INNER second JOIN LEFT'; +First JOIN INNER second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN INNER second JOIN RIGHT'; +First JOIN INNER second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN INNER second JOIN FULL'; +First JOIN INNER second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN LEFT second JOIN INNER'; +First JOIN LEFT second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN LEFT second JOIN LEFT'; +First JOIN LEFT second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN LEFT second JOIN RIGHT'; +First JOIN LEFT second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN LEFT second JOIN FULL'; +First JOIN LEFT second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 LEFT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN RIGHT second JOIN INNER'; +First JOIN RIGHT second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +SELECT 'First JOIN RIGHT second JOIN LEFT'; +First JOIN RIGHT second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +SELECT 'First JOIN RIGHT second JOIN RIGHT'; +First JOIN RIGHT second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN RIGHT second JOIN FULL'; +First JOIN RIGHT second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 RIGHT JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN FULL second JOIN INNER'; +First JOIN FULL second JOIN INNER +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +INNER JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +INNER JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +SELECT 'First JOIN FULL second JOIN LEFT'; +First JOIN FULL second JOIN LEFT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +LEFT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +LEFT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +LEFT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +LEFT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +SELECT 'First JOIN FULL second JOIN RIGHT'; +First JOIN FULL second JOIN RIGHT +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +RIGHT JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +RIGHT JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +RIGHT JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 0 4 Join_3_Value_4 +SELECT 'First JOIN FULL second JOIN FULL'; +First JOIN FULL second JOIN FULL +SELECT 'JOIN ON without conditions'; +JOIN ON without conditions +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 FULL JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +0 0 Join_1_Value_0 Join_1_Value_0 0 0 Join_2_Value_0 Join_2_Value_0 0 0 Join_3_Value_0 Join_3_Value_0 +1 1 Join_1_Value_1 Join_1_Value_1 1 1 Join_2_Value_1 Join_2_Value_1 1 1 Join_3_Value_1 Join_3_Value_1 +2 2 Join_1_Value_2 Join_1_Value_2 0 0 0 0 Join_3_Value_0 Join_3_Value_0 +0 0 3 3 Join_2_Value_3 Join_2_Value_3 0 0 +0 0 0 0 4 4 Join_3_Value_4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +FULL JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN ON with conditions'; +JOIN ON with conditions +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 3 Join_2_Value_3 0 +0 1 Join_2_Value_1 0 +0 0 4 Join_3_Value_4 +0 0 1 Join_3_Value_1 +SELECT '--'; +-- +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 0 0 +2 Join_1_Value_2 0 0 +0 1 Join_2_Value_1 0 +0 3 Join_2_Value_3 0 +0 0 1 Join_3_Value_1 +0 0 4 Join_3_Value_4 +SELECT 'JOIN multiple clauses'; +JOIN multiple clauses +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +FULL JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; +Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +Join_1_Value_2 Join_3_Value_0 + Join_2_Value_3 + Join_3_Value_4 +SELECT 'JOIN expression aliases'; +JOIN expression aliases +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +FULL JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 +SELECT '--'; +-- +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1_id = t2_id +FULL JOIN test_table_join_3 AS t3 ON t2_id = t3_id; +0 Join_1_Value_0 0 Join_2_Value_0 0 Join_3_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 1 Join_3_Value_1 +2 Join_1_Value_2 0 0 Join_3_Value_0 +0 3 Join_2_Value_3 0 +0 0 4 Join_3_Value_4 diff --git a/tests/queries/0_stateless/02372_analyzer_join.sql.j2 b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 new file mode 100644 index 00000000000..970c0d3da17 --- /dev/null +++ b/tests/queries/0_stateless/02372_analyzer_join.sql.j2 @@ -0,0 +1,170 @@ +-- Tags: long + +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +{% for join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'JOIN {{ join_type }}'; + +SELECT 'JOIN ON without conditions'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value +FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.value, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT id FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } + +SELECT value FROM test_table_join_1 {{ join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id; -- { serverError 207 } + +SELECT 'JOIN ON with conditions'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t2.value = 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON toString(t1.id) = toString(t2.id) AND t1.value = 'Join_1_Value_0' AND t2.value = 'Join_2_Value_0'; + +SELECT 'JOIN multiple clauses'; + +SELECT t1.id, t1.value, t2.id, t2.value +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id; + +SELECT 'JOIN expression aliases'; + +SELECT t1_id, t1.value, t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id); + +SELECT '--'; + +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id; + +{% endfor %} + +{% for first_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} +{% for second_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'First JOIN {{ first_join_type }} second JOIN {{ second_join_type }}'; + +SELECT 'JOIN ON without conditions'; + +SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_2.id, test_table_join_2.value, test_table_join_3.id, test_table_join_3.value +FROM test_table_join_1 {{ first_join_type }} JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; + +SELECT t1.id, test_table_join_1.id, t1.value, test_table_join_1.value, t2.id, test_table_join_2.id, t2.value, test_table_join_2.value, +t3.id, test_table_join_3.id, t3.value, test_table_join_3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT '--'; +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON test_table_join_1.id = test_table_join_2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON test_table_join_2.id = test_table_join_3.id; + +SELECT 'JOIN ON with conditions'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0'; + +SELECT '--'; + +SELECT t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id AND t1.value == 'Join_1_Value_0' AND t2.value == 'Join_2_Value_0' +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id AND t2.value == 'Join_2_Value_0' AND t3.value == 'Join_3_Value_0'; + +SELECT 'JOIN multiple clauses'; + +SELECT t1.value, t2.value, t3.value +FROM test_table_join_1 AS t1 +{{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id OR t1.id = t2.id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2.id = t3.id OR t3.id = t2.id; + +SELECT 'JOIN expression aliases'; + +SELECT t1_id, t1.value, t2_id, t2.value, t3_id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON (t1.id AS t1_id) = (t2.id AS t2_id) +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = (t3.id AS t3_id); + +SELECT '--'; + +SELECT t1.id AS t1_id, t1.value, t2.id AS t2_id, t2.value, t3.id AS t3_id, t3.value +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1_id = t2_id +{{ second_join_type }} JOIN test_table_join_3 AS t3 ON t2_id = t3_id; + +{% endfor %} +{% endfor %} + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference new file mode 100644 index 00000000000..3722c23e4a0 --- /dev/null +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.reference @@ -0,0 +1,60 @@ +-- { echoOn } + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String +1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String +1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String +\N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; +0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) +\N Nullable(UInt64) \N Nullable(String) 3 Nullable(UInt64) Join_2_Value_3 Nullable(String) +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 UInt64 1 UInt64 Join_1_Value_1 String 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 UInt64 2 UInt64 Join_1_Value_2 String \N Nullable(UInt64) \N Nullable(String) +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +0 UInt64 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 UInt64 Join_2_Value_0 String +1 UInt64 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 UInt64 Join_2_Value_1 String +3 UInt64 \N Nullable(UInt64) \N Nullable(String) 3 UInt64 Join_2_Value_3 String +SELECT '--'; +-- +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +0 Nullable(UInt64) 0 Nullable(UInt64) Join_1_Value_0 Nullable(String) 0 Nullable(UInt64) Join_2_Value_0 Nullable(String) +1 Nullable(UInt64) 1 Nullable(UInt64) Join_1_Value_1 Nullable(String) 1 Nullable(UInt64) Join_2_Value_1 Nullable(String) +2 Nullable(UInt64) 2 Nullable(UInt64) Join_1_Value_2 Nullable(String) \N Nullable(UInt64) \N Nullable(String) +\N Nullable(UInt64) \N Nullable(UInt64) \N Nullable(String) 3 Nullable(UInt64) Join_2_Value_3 Nullable(String) diff --git a/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql new file mode 100644 index 00000000000..a67a11bf8b0 --- /dev/null +++ b/tests/queries/0_stateless/02373_analyzer_join_use_nulls.sql @@ -0,0 +1,73 @@ +SET use_analyzer = 1; +SET join_use_nulls = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +-- { echoOn } + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id; + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02373_progress_contain_result.reference b/tests/queries/0_stateless/02373_progress_contain_result.reference index 1e7492e2829..a125646e7b8 100644 --- a/tests/queries/0_stateless/02373_progress_contain_result.reference +++ b/tests/queries/0_stateless/02373_progress_contain_result.reference @@ -1 +1 @@ -< X-ClickHouse-Summary: {"read_rows":"100","read_bytes":"800","written_rows":"0","written_bytes":"0","total_rows_to_read":"100","result_rows":"100","result_bytes":"131"} +< X-ClickHouse-Summary: {"read_rows":"100","read_bytes":"800","written_rows":"0","written_bytes":"0","total_rows_to_read":"100","result_rows":"100","result_bytes":"227"} diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference new file mode 100644 index 00000000000..28859f715b3 --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference @@ -0,0 +1,110 @@ +-- { echoOn } + +SELECT 'ARRAY JOIN with constant'; +ARRAY JOIN with constant +SELECT id, value, value_1 FROM test_table ARRAY JOIN [1, 2, 3] AS value_1; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 1 +0 Value 2 +0 Value 3 +SELECT '--'; +-- +SELECT id, value FROM test_table ARRAY JOIN [1, 2, 3] AS value; +0 1 +0 2 +0 3 +0 1 +0 2 +0 3 +SELECT '--'; +-- +WITH [1, 2, 3] AS constant_array SELECT id, value FROM test_table ARRAY JOIN constant_array AS value; +0 1 +0 2 +0 3 +0 1 +0 2 +0 3 +SELECT '--'; +-- +WITH [1, 2, 3] AS constant_array SELECT id, value, value_1 FROM test_table ARRAY JOIN constant_array AS value_1; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 1 +0 Value 2 +0 Value 3 +SELECT '--'; +-- +SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } +SELECT 'ARRAY JOIN with column'; +ARRAY JOIN with column +SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; +0 Value 1 +0 Value 2 +0 Value 3 +0 Value 4 +0 Value 5 +0 Value 6 +SELECT '--'; +-- +SELECT id, value_array, value FROM test_table ARRAY JOIN value_array AS value; +0 [1,2,3] 1 +0 [1,2,3] 2 +0 [1,2,3] 3 +0 [4,5,6] 4 +0 [4,5,6] 5 +0 [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array, value_array_element FROM test_table ARRAY JOIN value_array AS value_array_element; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [4,5,6] 4 +0 Value [4,5,6] 5 +0 Value [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY JOIN value_array_array_alias; +0 Value [1,2,3] +0 Value [1,2,3] +0 Value [1,2,3] +0 Value [4,5,6] +0 Value [4,5,6] +0 Value [4,5,6] +SELECT '--'; +-- +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } +SELECT '--'; +-- +SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; +0 Value [1,2,3] 1 +0 Value [1,2,3] 2 +0 Value [1,2,3] 3 +0 Value [4,5,6] 4 +0 Value [4,5,6] 5 +0 Value [4,5,6] 6 +SELECT '--'; +-- +SELECT id, value, value_array_array, value_array_array_inner_element, value_array_array_inner_element, value_array_array_inner_inner_element +FROM test_table ARRAY JOIN value_array_array AS value_array_array_inner_element +ARRAY JOIN value_array_array_inner_element AS value_array_array_inner_inner_element; +0 Value [[1,2,3]] [1,2,3] [1,2,3] 1 +0 Value [[1,2,3]] [1,2,3] [1,2,3] 2 +0 Value [[1,2,3]] [1,2,3] [1,2,3] 3 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 1 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 2 +0 Value [[1,2,3],[4,5,6]] [1,2,3] [1,2,3] 3 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 4 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 5 +0 Value [[1,2,3],[4,5,6]] [4,5,6] [4,5,6] 6 diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql new file mode 100644 index 00000000000..b7e19bceadf --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql @@ -0,0 +1,70 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String, + value_array Array(UInt64), + value_array_array Array(Array(UInt64)) +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value', [1, 2, 3], [[1, 2, 3]]), (0, 'Value', [4, 5, 6], [[1, 2, 3], [4, 5, 6]]); + +-- { echoOn } + +SELECT 'ARRAY JOIN with constant'; + +SELECT id, value, value_1 FROM test_table ARRAY JOIN [1, 2, 3] AS value_1; + +SELECT '--'; + +SELECT id, value FROM test_table ARRAY JOIN [1, 2, 3] AS value; + +SELECT '--'; + +WITH [1, 2, 3] AS constant_array SELECT id, value FROM test_table ARRAY JOIN constant_array AS value; + +SELECT '--'; + +WITH [1, 2, 3] AS constant_array SELECT id, value, value_1 FROM test_table ARRAY JOIN constant_array AS value_1; + +SELECT '--'; + +SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2; + +SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 } + +SELECT 'ARRAY JOIN with column'; + +SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array; + +SELECT '--'; + +SELECT id, value_array, value FROM test_table ARRAY JOIN value_array AS value; + +SELECT '--'; + +SELECT id, value, value_array, value_array_element FROM test_table ARRAY JOIN value_array AS value_array_element; + +SELECT '--'; + +SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY JOIN value_array_array_alias; + +SELECT '--'; + +SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 } + +SELECT '--'; + +SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element; + +SELECT '--'; + +SELECT id, value, value_array_array, value_array_array_inner_element, value_array_array_inner_element, value_array_array_inner_inner_element +FROM test_table ARRAY JOIN value_array_array AS value_array_array_inner_element +ARRAY JOIN value_array_array_inner_element AS value_array_array_inner_inner_element; + +-- { echoOff } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.reference b/tests/queries/0_stateless/02374_analyzer_join_using.reference new file mode 100644 index 00000000000..62750c33f89 --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_join_using.reference @@ -0,0 +1,452 @@ +-- { echoOn } + +SELECT 'JOIN INNER'; +JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id); +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN LEFT'; +JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +2 UInt16 2 UInt16 Join_1_Value_2 String 0 UInt16 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +Join_1_Value_2 String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN RIGHT'; +JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +3 UInt16 0 UInt16 String 3 UInt16 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String + String Join_2_Value_3 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'JOIN FULL'; +JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +0 UInt16 0 UInt16 Join_1_Value_0 String 0 UInt16 Join_2_Value_0 String +1 UInt16 1 UInt16 Join_1_Value_1 String 1 UInt16 Join_2_Value_1 String +2 UInt16 2 UInt16 Join_1_Value_2 String 0 UInt16 String +0 UInt16 0 UInt16 String 3 UInt16 Join_2_Value_3 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +Join_1_Value_0 String Join_2_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String +Join_1_Value_2 String String + String Join_2_Value_3 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id); +1 +1 +1 +1 +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } +SELECT 'First JOIN INNER second JOIN INNER'; +First JOIN INNER second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN LEFT'; +First JOIN INNER second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN RIGHT'; +First JOIN INNER second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN INNER second JOIN FULL'; +First JOIN INNER second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN INNER'; +First JOIN LEFT second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN LEFT'; +First JOIN LEFT second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN RIGHT'; +First JOIN LEFT second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN LEFT second JOIN FULL'; +First JOIN LEFT second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN INNER'; +First JOIN RIGHT second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN LEFT'; +First JOIN RIGHT second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN RIGHT'; +First JOIN RIGHT second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN RIGHT second JOIN FULL'; +First JOIN RIGHT second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +3 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN INNER'; +First JOIN FULL second JOIN INNER +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String Join_3_Value_0 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN LEFT'; +First JOIN FULL second JOIN LEFT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String Join_2_Value_3 String Join_3_Value_0 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id LEFT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN RIGHT'; +First JOIN FULL second JOIN RIGHT +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +4 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String + String Join_2_Value_3 String Join_3_Value_0 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id RIGHT JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } +SELECT 'First JOIN FULL second JOIN FULL'; +First JOIN FULL second JOIN FULL +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +0 UInt64 0 UInt64 Join_1_Value_0 String 0 UInt64 Join_2_Value_0 String 0 UInt64 Join_3_Value_0 String +1 UInt64 1 UInt64 Join_1_Value_1 String 1 UInt64 Join_2_Value_1 String 1 UInt64 Join_3_Value_1 String +2 UInt64 2 UInt64 Join_1_Value_2 String 0 UInt64 String 0 UInt64 String +0 UInt64 0 UInt64 String 3 UInt64 Join_2_Value_3 String 0 UInt64 Join_3_Value_0 String +0 UInt64 0 UInt64 String 0 UInt64 String 4 UInt64 Join_3_Value_4 String +SELECT '--'; +-- +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +Join_1_Value_0 String Join_2_Value_0 String Join_3_Value_0 String +Join_1_Value_1 String Join_2_Value_1 String Join_3_Value_1 String +Join_1_Value_2 String String String + String Join_2_Value_3 String Join_3_Value_0 String + String String Join_3_Value_4 String +SELECT '--'; +-- +SELECT 1 FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING(id); +1 +1 +1 +1 +1 +SELECT id FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id FULL JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } diff --git a/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 new file mode 100644 index 00000000000..a8e9cc9eefc --- /dev/null +++ b/tests/queries/0_stateless/02374_analyzer_join_using.sql.j2 @@ -0,0 +1,87 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt8, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt16, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +{% for join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'JOIN {{ join_type }}'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT '--'; + +SELECT 1 FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (id); + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value) +FROM test_table_join_1 AS t1 {{ join_type }} JOIN test_table_join_2 AS t2 USING (test_value); -- { serverError 47 } + +{% endfor %} + +{% for first_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} +{% for second_join_type in ['INNER', 'LEFT', 'RIGHT', 'FULL'] -%} + +SELECT 'First JOIN {{ first_join_type }} second JOIN {{ second_join_type }}'; + +SELECT id AS using_id, toTypeName(using_id), t1.id AS t1_id, toTypeName(t1_id), t1.value AS t1_value, toTypeName(t1_value), +t2.id AS t2_id, toTypeName(t2_id), t2.value AS t2_value, toTypeName(t2_value), t3.id AS t3_id, toTypeName(t3_id), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT '--'; + +SELECT t1.value AS t1_value, toTypeName(t1_value), t2.value AS t2_value, toTypeName(t2_value), t3.value AS t3_value, toTypeName(t3_value) +FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT '--'; + +SELECT 1 FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 USING (id) {{ second_join_type }} JOIN test_table_join_3 AS t3 USING(id); + +SELECT id FROM test_table_join_1 AS t1 {{ first_join_type }} JOIN test_table_join_2 AS t2 ON t1.id = t2.id {{ second_join_type }} JOIN test_table_join_3 AS t3 USING (id); -- { serverError 207 } + +{% endfor %} +{% endfor %} + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02375_analyzer_union.reference b/tests/queries/0_stateless/02375_analyzer_union.reference new file mode 100644 index 00000000000..199b9af5313 --- /dev/null +++ b/tests/queries/0_stateless/02375_analyzer_union.reference @@ -0,0 +1,62 @@ +-- { echoOn } + +SELECT 'Union constants'; +Union constants +SELECT 1 UNION ALL SELECT 1; +1 +1 +SELECT '--'; +-- +SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1; +1 +1 +SELECT '--'; +-- +SELECT 1 INTERSECT SELECT 1; +1 +SELECT '--'; +-- +SELECT 1 EXCEPT SELECT 1; +SELECT '--'; +-- +SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1); +1 +1 +SELECT 'Union non constants'; +Union non constants +SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1); +1 +1 +1 +SELECT '--'; +-- +SELECT id FROM test_table UNION ALL SELECT id FROM test_table; +0 +0 +SELECT '--'; +-- +SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table; +0 +SELECT '--'; +-- +SELECT id FROM test_table INTERSECT SELECT id FROM test_table; +0 +SELECT '--'; +-- +SELECT id FROM test_table EXCEPT SELECT id FROM test_table; +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table); +0 +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table); +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table); +0 +SELECT '--'; +-- +SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table); diff --git a/tests/queries/0_stateless/02375_analyzer_union.sql b/tests/queries/0_stateless/02375_analyzer_union.sql new file mode 100644 index 00000000000..4983040bd23 --- /dev/null +++ b/tests/queries/0_stateless/02375_analyzer_union.sql @@ -0,0 +1,71 @@ +SET use_analyzer = 0; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +-- { echoOn } + +SELECT 'Union constants'; + +SELECT 1 UNION ALL SELECT 1; + +SELECT '--'; + +SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1; + +SELECT '--'; + +SELECT 1 INTERSECT SELECT 1; + +SELECT '--'; + +SELECT 1 EXCEPT SELECT 1; + +SELECT '--'; + +SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1); + +SELECT 'Union non constants'; + +SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1); + +SELECT '--'; + +SELECT id FROM test_table UNION ALL SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM test_table INTERSECT SELECT id FROM test_table; + +SELECT '--'; +SELECT id FROM test_table EXCEPT SELECT id FROM test_table; + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table); + +SELECT '--'; + +SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table); + +-- { echoOff } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference new file mode 100644 index 00000000000..3641c7d2f09 --- /dev/null +++ b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.reference @@ -0,0 +1,43 @@ +-- { echoOn } + +SELECT id, value FROM test_table WHERE 1 IN (SELECT 1); +0 Value_0 +1 Value_1 +2 Value_2 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE 0 IN (SELECT 1); +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT 1); +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT 2); +2 Value_2 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN test_table_for_in; +0 Value_0 +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in); +0 Value_0 +1 Value_1 +SELECT '--'; +-- +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in UNION DISTINCT SELECT id FROM test_table_for_in); +0 Value_0 +1 Value_1 +SELECT '--'; +-- +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value FROM test_table WHERE id IN cte_test_table_for_in; +0 Value_0 +1 Value_1 +SELECT '--'; +-- +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value +FROM test_table WHERE id IN (SELECT id FROM cte_test_table_for_in UNION DISTINCT SELECT id FROM cte_test_table_for_in); +0 Value_0 +1 Value_1 diff --git a/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql new file mode 100644 index 00000000000..90b2f75b6c6 --- /dev/null +++ b/tests/queries/0_stateless/02376_analyzer_in_function_subquery.sql @@ -0,0 +1,60 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value_0'), (1, 'Value_1'), (2, 'Value_2'); + +DROP TABLE IF EXISTS test_table_for_in; +CREATE TABLE test_table_for_in +( + id UInt64 +) ENGINE=TinyLog; + +INSERT INTO test_table_for_in VALUES (0), (1); + +-- { echoOn } + +SELECT id, value FROM test_table WHERE 1 IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE 0 IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT 1); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT 2); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN test_table_for_in; + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in); + +SELECT '--'; + +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in UNION DISTINCT SELECT id FROM test_table_for_in); + +SELECT '--'; + +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value FROM test_table WHERE id IN cte_test_table_for_in; + +SELECT '--'; + +WITH cte_test_table_for_in AS (SELECT id FROM test_table_for_in) SELECT id, value +FROM test_table WHERE id IN (SELECT id FROM cte_test_table_for_in UNION DISTINCT SELECT id FROM cte_test_table_for_in); + +-- { echoOff } + +DROP TABLE test_table; +DROP TABLE test_table_for_in; diff --git a/tests/queries/0_stateless/02377_analyzer_in_function_set.reference b/tests/queries/0_stateless/02377_analyzer_in_function_set.reference new file mode 100644 index 00000000000..b32da0d591a --- /dev/null +++ b/tests/queries/0_stateless/02377_analyzer_in_function_set.reference @@ -0,0 +1,2 @@ +0 Value_0 +1 Value_1 diff --git a/tests/queries/0_stateless/02377_analyzer_in_function_set.sql b/tests/queries/0_stateless/02377_analyzer_in_function_set.sql new file mode 100644 index 00000000000..e5c27e72ea1 --- /dev/null +++ b/tests/queries/0_stateless/02377_analyzer_in_function_set.sql @@ -0,0 +1,23 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value_0'), (1, 'Value_1'), (2, 'Value_2'); + +DROP TABLE IF EXISTS special_set_table; +CREATE TABLE special_set_table +( + id UInt64 +) ENGINE=Set; + +INSERT INTO special_set_table VALUES (0), (1); + +SELECT id, value FROM test_table WHERE id IN special_set_table; + +DROP TABLE special_set_table; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference new file mode 100644 index 00000000000..1fa79677876 --- /dev/null +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -0,0 +1,739 @@ +-- { echoOn } + +SELECT 'Constants'; +Constants +DESCRIBE (SELECT 1, 'Value'); +1 UInt8 +\'Value\' String +SELECT '--'; +-- +DESCRIBE (SELECT 1 + 1, concat('Value_1', 'Value_2')); +plus(1, 1) UInt16 +concat(\'Value_1\', \'Value_2\') String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)')); +CAST(tuple(1, \'Value\'), \'Tuple (id UInt64, value String)\') Tuple(id UInt64, value String) +SELECT 'Columns'; +Columns +DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table); +id UInt64 +id UInt64 +id UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table); +toString(id) String +toString(value) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table); +toString(id) String +toString(value) String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* FROM test_table_compound); +tuple_value.value_1 UInt64 +tuple_value.value_2 String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY x -> x FROM test_table_compound); +tuple_value.value_1 UInt64 +tuple_value.value_2 String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY toString FROM test_table_compound); +toString(tuple_value.value_1) String +toString(tuple_value.value_2) String +SELECT '--'; +-- +DESCRIBE (SELECT tuple_value.* APPLY x -> toString(x) FROM test_table_compound); +toString(tuple_value.value_1) String +toString(tuple_value.value_2) String +SELECT 'Constants with aliases'; +Constants with aliases +DESCRIBE (SELECT 1 AS a, a AS b, b, b AS c, c, 'Value' AS d, d AS e, e AS f); +a UInt8 +b UInt8 +b UInt8 +c UInt8 +c UInt8 +d String +e String +f String +SELECT '--'; +-- +DESCRIBE (SELECT plus(1 AS a, a AS b), plus(b, b), plus(b, b) AS c, concat('Value' AS d, d) AS e, e); +plus(a, b) UInt16 +plus(b, b) UInt16 +c UInt16 +e String +e String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.id, a.value); +a Tuple(id UInt64, value String) +a.id UInt64 +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.*); +a Tuple(id UInt64, value String) +a.id UInt64 +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT id); +a Tuple(id UInt64, value String) +a.value String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value); +a Tuple(id UInt64, value String) +a.id UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY toString); +a Tuple(id UInt64, value String) +toString(a.id) String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY x -> toString(x)); +a Tuple(id UInt64, value String) +toString(a.id) String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a)); +a Tuple(id UInt64, value String) +tupleElement(a, \'id\') UInt64 +tupleElement(a, \'value\') String +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a) AS b); +a Tuple(id UInt64, value String) +b.id UInt64 +b.value String +SELECT 'Columns with aliases'; +Columns with aliases +DESCRIBE (SELECT test_table.id AS a, a, test_table.id AS b, b AS c, c FROM test_table); +a UInt64 +a UInt64 +b UInt64 +c UInt64 +c UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT plus(test_table.id AS a, test_table.id), plus(id, id AS b), plus(b, b), plus(test_table.id, test_table.id) FROM test_table); +plus(a, id) UInt64 +plus(id, b) UInt64 +plus(b, b) UInt64 +plus(id, id) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT test_table.* REPLACE id + (id AS id_alias) AS id, id_alias FROM test_table); +plus(id, id_alias) UInt64 +value String +id_alias UInt64 +SELECT 'Matcher'; +Matcher +DESCRIBE (SELECT * FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT test_table.* FROM test_table); +id UInt64 +value String +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table); +id UInt8 +value UInt8 +test_table.id UInt64 +test_table.value String +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table AS t1); +id UInt8 +value UInt8 +t1.id UInt64 +t1.value String +SELECT 'Lambda'; +Lambda +DESCRIBE (SELECT arrayMap(x -> x + 1, [1,2,3])); +arrayMap(lambda(tuple(x), plus(x, 1)), [1, 2, 3]) Array(UInt16) +SELECT '--'; +-- +DESCRIBE (SELECT 1 AS a, arrayMap(x -> x + a, [1,2,3])); +a UInt8 +arrayMap(lambda(tuple(x), plus(x, a)), [1, 2, 3]) Array(UInt16) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> x + test_table.id + test_table.id + id, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), plus(plus(plus(x, id), id), id)), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> x + (test_table.id AS first) + (test_table.id AS second) + id, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), plus(plus(plus(x, first), second), id)), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> x, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY toString, [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), toString(id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> toString(x), [1,2,3]) FROM test_table); +arrayMap(lambda(tuple(x), toString(id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.*, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> x, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY toString, [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> toString(x), [1,2,3])); +compound_value Tuple(id UInt64) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> x, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), compound_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY toString, [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> toString(x), [1,2,3])); +compound_value Tuple(id UInt64, value String) +arrayMap(lambda(tuple(x), toString(compound_value.id)), [1, 2, 3]) Array(String) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a), [1,2,3]) FROM test_table); +a Tuple(id UInt64) +arrayMap(lambda(tuple(x), tupleElement(a, \'id\')), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a) AS untupled_value, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +arrayMap(untupled_value, [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +untupled_value.id UInt64 +arrayMap(lambda(tuple(x), untupled_value.id), [1, 2, 3]) Array(UInt64) +SELECT '--'; +-- +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value AS untupled_value_in_lambda, [1,2,3]) FROM test_table); +a Tuple(id UInt64) +untupled_value.id UInt64 +arrayMap(untupled_value_in_lambda, [1, 2, 3]) Array(UInt64) +SELECT 'Standalone lambda'; +Standalone lambda +DESCRIBE (WITH x -> x + 1 AS test_lambda SELECT test_lambda(1)); +test_lambda(1) UInt16 +SELECT '--'; +-- +DESCRIBE (WITH x -> * AS test_lambda SELECT test_lambda(1) AS value, value FROM test_table); +id UInt64 +value String +id UInt64 +value String +SELECT 'Subquery'; +Subquery +DESCRIBE (SELECT (SELECT 1), (SELECT 2), (SELECT 3) AS a, (SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), arrayMap(x -> (SELECT 1), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.a, c.b); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.*); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 UNION DISTINCT SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3 UNION DISTINCT SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2 UNION DISTINCT SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.a, c.b); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.*); +c Tuple(a UInt8, b UInt8) +c.a UInt8 +c.b UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT (SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); +_subquery_1 Nullable(UInt8) +_subquery_2 Nullable(UInt8) +a Nullable(UInt8) +_subquery_4 Nullable(UInt8) +SELECT '--'; +-- +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); +arrayMap(lambda(tuple(x), _subquery_1), [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(a, [1, 2, 3]) Array(Nullable(UInt8)) +arrayMap(lambda(tuple(x), _subquery_3), [1, 2, 3]) Array(Nullable(UInt8)) +SELECT 'Window functions'; +Window functions +DESCRIBE (SELECT count() OVER ()); +count() OVER () UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER () AS window_function); +window_function UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id) FROM test_table); +count() OVER (PARTITION BY id) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value) FROM test_table); +count() OVER (PARTITION BY id, value) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY (id AS id_alias), (value AS value_alias) ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id_alias, value_alias ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY (id AS id_alias) ASC, (value AS value_alias) DESC ROWS CURRENT ROW) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id_alias ASC, value_alias DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 + 1 PRECEDING AND 2 + 2 FOLLOWING) FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN plus(1, 1) PRECEDING AND plus(2, 2) FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN ((1 + 1) AS frame_offset_begin) PRECEDING AND ((2 + 2) AS frame_offset_end) FOLLOWING) +FROM test_table); +count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN frame_offset_begin PRECEDING AND frame_offset_end FOLLOWING) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS FIRST) FROM test_table); +count() OVER (ORDER BY toNullable(id) ASC NULLS FIRST) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS LAST) FROM test_table); +count() OVER (ORDER BY toNullable(id) ASC NULLS LAST) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 TO 5 STEP 1) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM 1 TO 5 STEP 1) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 + 1 TO 6 STEP 1 + 1) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM plus(1, 1) TO 6 STEP plus(1, 1)) UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM ((1 + 1) AS from) TO (6 AS to) STEP ((1 + 1) AS step)) FROM test_table); +count() OVER (ORDER BY id ASC WITH FILL FROM from TO to STEP step) UInt64 +SELECT 'Window functions WINDOW'; +Window functions WINDOW +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id)); +count() OVER window_name UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id ORDER BY value)); +count() OVER window_name UInt64 +SELECT '--'; +-- +DESCRIBE (SELECT count() OVER (window_name ORDER BY id) FROM test_table WINDOW window_name AS (PARTITION BY id)); +count() OVER (window_name ORDER BY id ASC) UInt64 +SELECT 'IN function'; +IN function +DESCRIBE (SELECT id IN (SELECT 1) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT id IN (SELECT id FROM test_table_in) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (SELECT id IN test_table_in FROM test_table); +in(id, test_table_in) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN (SELECT id FROM test_table_in_cte) FROM test_table); +in(id, _subquery_1) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); +in(id, test_table_in_cte) UInt8 +SELECT 'Joins'; +Joins +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.* FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.*, t2.* FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +SELECT 'Multiple JOINS'; +Multiple JOINS +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2, test_table_join_3); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.*, test_table_join_3.* +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.*, t2.*, t3.* +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString, test_table_join_3.* APPLY toString +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x), test_table_join_3.* APPLY x -> toString(x) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +toString(t1.id) String +toString(t1.value) String +toString(value_join_1) String +toString(t2.id) String +toString(t2.value) String +toString(value_join_2) String +toString(t3.id) String +toString(t3.value) String +toString(value_join_3) String +SELECT '--'; +-- +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2, +test_table_join_3.id, test_table_join_3.value, test_table_join_3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +test_table_join_1.id UInt64 +test_table_join_1.value String +value_join_1 String +test_table_join_2.id UInt64 +test_table_join_2.value String +value_join_2 String +test_table_join_3.id UInt64 +test_table_join_3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2, t3.id, t3.value, t3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); +t1.id UInt64 +t1.value String +value_join_1 String +t2.id UInt64 +t2.value String +value_join_2 String +t3.id UInt64 +t3.value String +value_join_3 String +SELECT 'Joins USING'; +Joins USING +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); +id UInt64 +t1.value String +value_join_1 String +t2.value String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); +id UInt64 +value String +value_join_1 String +value_join_2 String +SELECT '--'; +-- +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); +id UInt64 +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +SELECT '--'; +-- +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); +id UInt64 +value String +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +SELECT 'Multiple Joins USING'; +Multiple Joins USING +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); +id UInt64 +t1.value String +value_join_1 String +t2.value String +value_join_2 String +t3.value String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); +id UInt64 +value String +value_join_1 String +value_join_2 String +value_join_3 String +SELECT '--'; +-- +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); +id UInt64 +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +t3.id UInt64 +t3.value String +SELECT '--'; +-- +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); +id UInt64 +value String +t1.id UInt64 +t1.value String +t2.id UInt64 +t2.value String +t3.id UInt64 +t3.value String diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql new file mode 100644 index 00000000000..a31f5afd8fb --- /dev/null +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql @@ -0,0 +1,541 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP TABLE IF EXISTS test_table_in; +CREATE TABLE test_table_in +( + id UInt64 +) ENGINE=TinyLog; + +DROP TABLE IF EXISTS test_table_compound; +CREATE TABLE test_table_compound +( + id UInt64, + tuple_value Tuple(value_1 UInt64, value_2 String) +) ENGINE=TinyLog; + +INSERT INTO test_table_compound VALUES (0, tuple(0, 'Value')); + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String, + value_join_1 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value', 'Join_1_Value'); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String, + value_join_2 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value', 'Join_2_Value'); + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String, + value_join_3 String +) ENGINE=TinyLog; + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value', 'Join_3_Value'); + +-- { echoOn } + +SELECT 'Constants'; + +DESCRIBE (SELECT 1, 'Value'); + +SELECT '--'; + +DESCRIBE (SELECT 1 + 1, concat('Value_1', 'Value_2')); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)')); + +SELECT 'Columns'; + +DESCRIBE (SELECT test_table.id, test_table.id, id FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY x -> x FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY toString FROM test_table_compound); + +SELECT '--'; + +DESCRIBE (SELECT tuple_value.* APPLY x -> toString(x) FROM test_table_compound); + +SELECT 'Constants with aliases'; + +DESCRIBE (SELECT 1 AS a, a AS b, b, b AS c, c, 'Value' AS d, d AS e, e AS f); + +SELECT '--'; + +DESCRIBE (SELECT plus(1 AS a, a AS b), plus(b, b), plus(b, b) AS c, concat('Value' AS d, d) AS e, e); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.id, a.value); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.*); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT id); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY toString); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, a.* EXCEPT value APPLY x -> toString(x)); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a)); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS a, untuple(a) AS b); + +SELECT 'Columns with aliases'; + +DESCRIBE (SELECT test_table.id AS a, a, test_table.id AS b, b AS c, c FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT plus(test_table.id AS a, test_table.id), plus(id, id AS b), plus(b, b), plus(test_table.id, test_table.id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT test_table.* REPLACE id + (id AS id_alias) AS id, id_alias FROM test_table); + +SELECT 'Matcher'; + +DESCRIBE (SELECT * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT test_table.* FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS id, 2 AS value, * FROM test_table AS t1); + +SELECT 'Lambda'; + +DESCRIBE (SELECT arrayMap(x -> x + 1, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT 1 AS a, arrayMap(x -> x + a, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> x + test_table.id + test_table.id + id, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> x + (test_table.id AS first) + (test_table.id AS second) + id, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> x, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY toString, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> test_table.* EXCEPT value APPLY x -> toString(x), [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.*, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> x, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY toString, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS compound_value, arrayMap(x -> compound_value.* APPLY x -> toString(x), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> x, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY toString, [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1, 'Value'), 'Tuple (id UInt64, value String)') AS compound_value, arrayMap(x -> compound_value.* EXCEPT value APPLY x -> toString(x), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a), [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, arrayMap(x -> untuple(a) AS untupled_value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value, [1,2,3]) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT cast(tuple(1), 'Tuple (id UInt64)') AS a, untuple(a) AS untupled_value, arrayMap(x -> untupled_value AS untupled_value_in_lambda, [1,2,3]) FROM test_table); + +SELECT 'Standalone lambda'; + +DESCRIBE (WITH x -> x + 1 AS test_lambda SELECT test_lambda(1)); + +SELECT '--'; + +DESCRIBE (WITH x -> * AS test_lambda SELECT test_lambda(1) AS value, value FROM test_table); + +SELECT 'Subquery'; + +DESCRIBE (SELECT (SELECT 1), (SELECT 2), (SELECT 3) AS a, (SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), arrayMap(x -> (SELECT 1), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.a, c.b); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b) AS c, c.*); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 UNION DISTINCT SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3 UNION DISTINCT SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2 UNION DISTINCT SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.a, c.b); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1 AS a, 2 AS b UNION DISTINCT SELECT 1, 2) AS c, c.*); + +SELECT '--'; + +DESCRIBE (SELECT (SELECT 1), (SELECT 2 UNION DISTINCT SELECT 2), (SELECT 3) AS a, (SELECT 4 UNION DISTINCT SELECT 4)); + +SELECT '--'; + +DESCRIBE (SELECT arrayMap(x -> (SELECT 1 UNION DISTINCT SELECT 1), [1,2,3]), arrayMap(x -> (SELECT 2) AS a, [1, 2, 3]), +arrayMap(x -> (SELECT 3 UNION DISTINCT SELECT 3), [1,2,3])); + +SELECT 'Window functions'; + +DESCRIBE (SELECT count() OVER ()); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER () AS window_function); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC RANGE BETWEEN CURRENT ROW AND CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY (id AS id_alias), (value AS value_alias) ORDER BY id ASC, value DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY (id AS id_alias) ASC, (value AS value_alias) DESC ROWS CURRENT ROW) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN 1 + 1 PRECEDING AND 2 + 2 FOLLOWING) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (PARTITION BY id, value ORDER BY id ASC, value DESC ROWS BETWEEN ((1 + 1) AS frame_offset_begin) PRECEDING AND ((2 + 2) AS frame_offset_end) FOLLOWING) +FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS FIRST) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY toNullable(id) NULLS LAST) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 TO 5 STEP 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM 1 + 1 TO 6 STEP 1 + 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (ORDER BY id WITH FILL FROM ((1 + 1) AS from) TO (6 AS to) STEP ((1 + 1) AS step)) FROM test_table); + +SELECT 'Window functions WINDOW'; + +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id)); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER window_name FROM test_table WINDOW window_name AS (PARTITION BY id ORDER BY value)); + +SELECT '--'; + +DESCRIBE (SELECT count() OVER (window_name ORDER BY id) FROM test_table WINDOW window_name AS (PARTITION BY id)); + +SELECT 'IN function'; + +DESCRIBE (SELECT id IN (SELECT 1) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT id IN (SELECT id FROM test_table_in) FROM test_table); + +SELECT '--'; + +DESCRIBE (SELECT id IN test_table_in FROM test_table); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN (SELECT id FROM test_table_in_cte) FROM test_table); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); + +SELECT 'Joins'; + +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.* FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.*, t2.* FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x) FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2 FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id); + +SELECT 'Multiple JOINS'; + +DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2, test_table_join_3); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY toString FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT * APPLY x -> toString(x) FROM test_table_join_1 AS t1, test_table_join_2 AS t2, test_table_join_3 AS t3); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.*, test_table_join_2.*, test_table_join_3.* +FROM test_table_join_1 INNER JOIN test_table_join_2 ON test_table_join_1.id = test_table_join_2.id +INNER JOIN test_table_join_3 ON test_table_join_2.id = test_table_join_3.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.*, t2.*, t3.* +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY toString, test_table_join_2.* APPLY toString, test_table_join_3.* APPLY toString +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.* APPLY x -> toString(x), test_table_join_2.* APPLY x -> toString(x), test_table_join_3.* APPLY x -> toString(x) +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT test_table_join_1.id, test_table_join_1.value, test_table_join_1.value_join_1, test_table_join_2.id, test_table_join_2.value, test_table_join_2.value_join_2, +test_table_join_3.id, test_table_join_3.value, test_table_join_3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT '--'; + +DESCRIBE (SELECT t1.id, t1.value, t1.value_join_1, t2.id, t2.value, t2.value_join_2, t3.id, t3.value, t3.value_join_3 +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id INNER JOIN test_table_join_3 AS t3 ON t2.id = t3.id); + +SELECT 'Joins USING'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); + +SELECT '--'; + +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value)); + +SELECT 'Multiple Joins USING'; + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); + +SELECT '--'; + +DESCRIBE (SELECT id, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id)); + +SELECT '--'; + +DESCRIBE (SELECT id, value, t1.id, t1.value, t2.id, t2.value, t3.id, t3.value +FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, value) INNER JOIN test_table_join_3 AS t3 USING (id, value)); + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; +DROP TABLE test_table; +DROP TABLE test_table_compound; diff --git a/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference b/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02379_analyzer_subquery_depth.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql b/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql new file mode 100644 index 00000000000..3fea94e1f1c --- /dev/null +++ b/tests/queries/0_stateless/02379_analyzer_subquery_depth.sql @@ -0,0 +1,4 @@ +SET use_analyzer = 1; + +SELECT (SELECT a FROM (SELECT 1 AS a)) SETTINGS max_subquery_depth = 1; -- { serverError 162 } +SELECT (SELECT a FROM (SELECT 1 AS a)) SETTINGS max_subquery_depth = 2; diff --git a/tests/queries/0_stateless/02380_analyzer_join_sample.reference b/tests/queries/0_stateless/02380_analyzer_join_sample.reference new file mode 100644 index 00000000000..14d5f58d76a --- /dev/null +++ b/tests/queries/0_stateless/02380_analyzer_join_sample.reference @@ -0,0 +1,2 @@ +0 0 2 2 +1 1 2 2 diff --git a/tests/queries/0_stateless/02380_analyzer_join_sample.sql b/tests/queries/0_stateless/02380_analyzer_join_sample.sql new file mode 100644 index 00000000000..1cf5a4a04df --- /dev/null +++ b/tests/queries/0_stateless/02380_analyzer_join_sample.sql @@ -0,0 +1,29 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE=MergeTree +ORDER BY id +SAMPLE BY id; + +INSERT INTO test_table_join_1 VALUES (0, 'Value'), (1, 'Value_1'); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE=MergeTree +ORDER BY id +SAMPLE BY id; + +INSERT INTO test_table_join_2 VALUES (0, 'Value'), (1, 'Value_1'); + +SELECT t1.id AS t1_id, t2.id AS t2_id, t1._sample_factor AS t1_sample_factor, t2._sample_factor AS t2_sample_factor +FROM test_table_join_1 AS t1 SAMPLE 1/2 INNER JOIN test_table_join_2 AS t2 SAMPLE 1/2 ON t1.id = t2.id; + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02381_analyzer_join_final.reference b/tests/queries/0_stateless/02381_analyzer_join_final.reference new file mode 100644 index 00000000000..e00d444d142 --- /dev/null +++ b/tests/queries/0_stateless/02381_analyzer_join_final.reference @@ -0,0 +1,2 @@ +0 0 3 1 +1 1 1 3 diff --git a/tests/queries/0_stateless/02381_analyzer_join_final.sql b/tests/queries/0_stateless/02381_analyzer_join_final.sql new file mode 100644 index 00000000000..6870b6e5f98 --- /dev/null +++ b/tests/queries/0_stateless/02381_analyzer_join_final.sql @@ -0,0 +1,34 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value UInt64 +) ENGINE=SummingMergeTree(value) +ORDER BY id +SAMPLE BY id; + +SYSTEM STOP MERGES test_table_join_1; +INSERT INTO test_table_join_1 VALUES (0, 1), (1, 1); +INSERT INTO test_table_join_1 VALUES (0, 2); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value UInt64 +) ENGINE=SummingMergeTree(value) +ORDER BY id +SAMPLE BY id; + +SYSTEM STOP MERGES test_table_join_2; +INSERT INTO test_table_join_2 VALUES (0, 1), (1, 1); +INSERT INTO test_table_join_2 VALUES (1, 2); + +SELECT t1.id AS t1_id, t2.id AS t2_id, t1.value AS t1_value, t2.value AS t2_value +FROM test_table_join_1 AS t1 FINAL INNER JOIN test_table_join_2 AS t2 FINAL ON t1.id = t2.id +ORDER BY t1_id; + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference new file mode 100644 index 00000000000..f2199aad4c8 --- /dev/null +++ b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.reference @@ -0,0 +1,47 @@ +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, id, id) ORDER BY id, t1.value; -- { serverError 36 } +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +2 Join_1_Value_2 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +3 Join_2_Value_3 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; +0 Join_2_Value_3 +0 Join_1_Value_0 Join_2_Value_0 +1 Join_1_Value_1 Join_2_Value_1 +2 Join_1_Value_2 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 +4 Join_3_Value_4 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; +0 Join_3_Value_4 +0 Join_1_Value_0 Join_2_Value_0 Join_3_Value_0 +1 Join_1_Value_1 Join_2_Value_1 Join_3_Value_1 diff --git a/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql new file mode 100644 index 00000000000..8b6d9832086 --- /dev/null +++ b/tests/queries/0_stateless/02382_analyzer_matcher_join_using.sql @@ -0,0 +1,74 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt8, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt16, + value String +) ENGINE = TinyLog; + +DROP TABLE IF EXISTS test_table_join_3; +CREATE TABLE test_table_join_3 +( + id UInt64, + value String +) ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +INSERT INTO test_table_join_3 VALUES (0, 'Join_3_Value_0'); +INSERT INTO test_table_join_3 VALUES (1, 'Join_3_Value_1'); +INSERT INTO test_table_join_3 VALUES (4, 'Join_3_Value_4'); + +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id, id, id) ORDER BY id, t1.value; -- { serverError 36 } + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) INNER JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) LEFT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) RIGHT JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 USING (id) FULL JOIN test_table_join_3 AS t3 USING (id) ORDER BY id, t1.value; + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; +DROP TABLE test_table_join_3; diff --git a/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference new file mode 100644 index 00000000000..e48ae282f5d --- /dev/null +++ b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.reference @@ -0,0 +1,24 @@ +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +SELECT '--'; +-- +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; +0 3 Join_2_Value_3 +0 Join_1_Value_0 0 Join_2_Value_0 +1 Join_1_Value_1 1 Join_2_Value_1 +2 Join_1_Value_2 0 diff --git a/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql new file mode 100644 index 00000000000..adb858c217f --- /dev/null +++ b/tests/queries/0_stateless/02383_analyzer_merge_tree_self_join.sql @@ -0,0 +1,44 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt64, + value String +) ENGINE = MergeTree ORDER BY id; + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt64, + value String +) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_table_join_1 VALUES (0, 'Join_1_Value_0'); +INSERT INTO test_table_join_1 VALUES (1, 'Join_1_Value_1'); +INSERT INTO test_table_join_1 VALUES (2, 'Join_1_Value_2'); + +INSERT INTO test_table_join_2 VALUES (0, 'Join_2_Value_0'); +INSERT INTO test_table_join_2 VALUES (1, 'Join_2_Value_1'); +INSERT INTO test_table_join_2 VALUES (3, 'Join_2_Value_3'); + +-- { echoOn } + +SELECT * FROM test_table_join_1 AS t1 INNER JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 LEFT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 RIGHT JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +SELECT '--'; + +SELECT * FROM test_table_join_1 AS t1 FULL JOIN test_table_join_2 AS t2 ON t1.id = t2.id ORDER BY t1.id, t1.value; + +-- { echoOff } + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference new file mode 100644 index 00000000000..5f783010a1c --- /dev/null +++ b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.reference @@ -0,0 +1,10 @@ +Dictionary +0 Value +Value +Value +Value +JOIN +0 Value +Value +Value +Value diff --git a/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql new file mode 100644 index 00000000000..79aa0703aaa --- /dev/null +++ b/tests/queries/0_stateless/02384_analyzer_dict_get_join_get.sql @@ -0,0 +1,59 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +DROP DICTIONARY IF EXISTS test_dictionary; +CREATE DICTIONARY test_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +LAYOUT(FLAT()) +SOURCE(CLICKHOUSE(TABLE 'test_table')) +LIFETIME(0); + +SELECT 'Dictionary'; + +SELECT * FROM test_dictionary; + +SELECT dictGet('test_dictionary', 'value', toUInt64(0)); + +SELECT dictGet(test_dictionary, 'value', toUInt64(0)); + +WITH 'test_dictionary' AS dictionary SELECT dictGet(dictionary, 'value', toUInt64(0)); + +WITH 'invalid_dictionary' AS dictionary SELECT dictGet(dictionary, 'value', toUInt64(0)); -- { serverError 36 } + +DROP DICTIONARY test_dictionary; +DROP TABLE test_table; + +DROP TABLE IF EXISTS test_table_join; +CREATE TABLE test_table_join +( + id UInt64, + value String +) ENGINE=Join(Any, Left, id); + +INSERT INTO test_table_join VALUES (0, 'Value'); + +SELECT 'JOIN'; + +SELECT * FROM test_table_join; + +SELECT joinGet('test_table_join', 'value', toUInt64(0)); + +SELECT joinGet(test_table_join, 'value', toUInt64(0)); + +WITH 'test_table_join' AS join_table SELECT joinGet(join_table, 'value', toUInt64(0)); + +WITH 'invalid_test_table_join' AS join_table SELECT joinGet(join_table, 'value', toUInt64(0)); -- { serverError 60 } + +DROP TABLE test_table_join; diff --git a/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference new file mode 100644 index 00000000000..05c5c9872a6 --- /dev/null +++ b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.reference @@ -0,0 +1,7 @@ +(1,'Value') 1 Value +-- +2 +-- +1 1 +-- +1 1 diff --git a/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql new file mode 100644 index 00000000000..ec2155d241b --- /dev/null +++ b/tests/queries/0_stateless/02385_analyzer_aliases_compound_expression.sql @@ -0,0 +1,21 @@ +SET use_analyzer = 1; + +SELECT cast(tuple(1, 'Value'), 'Tuple(first UInt64, second String)') AS value, value.first, value.second; + +SELECT '--'; + +WITH (x -> x + 1) AS lambda SELECT lambda(1); + +WITH (x -> x + 1) AS lambda SELECT lambda.nested(1); -- { serverError 36 } + +SELECT '--'; + +SELECT * FROM (SELECT 1) AS t1, t1 AS t2; + +SELECT '--'; + +SELECT * FROM t1 AS t2, (SELECT 1) AS t1; + +SELECT * FROM (SELECT 1) AS t1, t1.nested AS t2; -- { serverError 36 } + +SELECT * FROM t1.nested AS t2, (SELECT 1) AS t1; -- { serverError 36 } diff --git a/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql new file mode 100644 index 00000000000..bffbc46809a --- /dev/null +++ b/tests/queries/0_stateless/02386_analyzer_in_function_nested_subqueries.sql @@ -0,0 +1,3 @@ +SET use_analyzer = 1; + +SELECT (NULL IN (SELECT 9223372036854775806 IN (SELECT 65536), inf, NULL IN (NULL))) IN (SELECT NULL IN (NULL)); diff --git a/tests/queries/0_stateless/02387_analyzer_cte.reference b/tests/queries/0_stateless/02387_analyzer_cte.reference new file mode 100644 index 00000000000..1ad3aee198b --- /dev/null +++ b/tests/queries/0_stateless/02387_analyzer_cte.reference @@ -0,0 +1,7 @@ +1 +-- +0 Value +-- +1 +-- +0 Value diff --git a/tests/queries/0_stateless/02387_analyzer_cte.sql b/tests/queries/0_stateless/02387_analyzer_cte.sql new file mode 100644 index 00000000000..725e18af315 --- /dev/null +++ b/tests/queries/0_stateless/02387_analyzer_cte.sql @@ -0,0 +1,26 @@ +SET use_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +WITH cte_subquery AS (SELECT 1) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT * FROM test_table) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT 1 UNION DISTINCT SELECT 1) SELECT * FROM cte_subquery; + +SELECT '--'; + +WITH cte_subquery AS (SELECT * FROM test_table UNION DISTINCT SELECT * FROM test_table) SELECT * FROM cte_subquery; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02388_analyzer_recursive_lambda.reference b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql new file mode 100644 index 00000000000..dd342b32d1f --- /dev/null +++ b/tests/queries/0_stateless/02388_analyzer_recursive_lambda.sql @@ -0,0 +1,5 @@ +SET use_analyzer = 1; + +WITH x -> plus(lambda(1), x) AS lambda SELECT lambda(1048576); -- { serverError 1 }; + +WITH lambda(lambda(plus(x, x, -1)), tuple(x), x + 2147483646) AS lambda, x -> plus(lambda(1), x, 2) AS lambda SELECT 1048576, lambda(1048576); -- { serverError 1 }; diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference new file mode 100644 index 00000000000..935c53358c0 --- /dev/null +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.reference @@ -0,0 +1,121 @@ +-- { echoOn } + +SELECT arrayMap(x -> x + arrayMap(x -> x + 1, [1])[1], [1,2,3]); +[3,4,5] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> x, [1])[1], [1,2,3]); +[2,3,4] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(y -> x + y, [1])[1], [1,2,3]); +[3,5,7] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5), [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT (SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5 UNION DISTINCT SELECT 5), [1])[1], [1,2,3]); +[6,7,8] +SELECT '--'; +-- +SELECT (SELECT 5 UNION DISTINCT SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); +5 [6,7,8] +SELECT '--'; +-- +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]); +['1','2','3'] +SELECT '--'; +-- +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]); +[['1_1','1_2','1_3'],['2_1','2_2','2_3'],['3_1','3_2','3_3']] +SELECT '--'; +-- +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; +INSERT INTO test_table VALUES (0, 'Value'); +SELECT arrayMap(x -> x + arrayMap(x -> id, [1])[1], [1,2,3]) FROM test_table; +[1,2,3] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> x + id, [1])[1], [1,2,3]) FROM test_table; +[2,3,4] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(y -> x + y + id, [1])[1], [1,2,3]) FROM test_table; +[3,5,7] +SELECT '--'; +-- +SELECT id AS id_alias, arrayMap(x -> x + arrayMap(y -> x + y + id_alias, [1])[1], [1,2,3]) FROM test_table; +0 [3,5,7] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]) FROM test_table; +[6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]) FROM test_table; +5 [6,7,8] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + constant, [1])[1], [1,2,3]) FROM test_table; +5 [7,8,9] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + id + constant, [1])[1], [1,2,3]) FROM test_table; +5 [7,8,9] +SELECT '--'; +-- +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(y -> x + y + id + constant, [1])[1], [1,2,3]) FROM test_table; +5 [8,10,12] +SELECT '--'; +-- +SELECT arrayMap(x -> x + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[1,2,3] +SELECT '--'; +-- +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[0,0,0] +SELECT '--'; +-- +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; +[0,0,0] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]) FROM test_table; +['0','0','0'] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> lambda(y), [1,2,3]), [1,2,3]) FROM test_table; +[['0','0','0'],['0','0','0'],['0','0','0']] +SELECT '--'; +-- +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]) FROM test_table; +[['0_0','0_0','0_0'],['0_0','0_0','0_0'],['0_0','0_0','0_0']] +SELECT '--'; +-- +SELECT arrayMap(x -> concat(concat(concat(concat(concat(toString(id), '___\0_______\0____'), toString(id), concat(concat(toString(id), ''), toString(id)), toString(id)), + arrayMap(x -> concat(concat(concat(concat(toString(id), ''), toString(id)), toString(id), '___\0_______\0____'), toString(id)) AS lambda, [NULL, inf, 1, 1]), + concat(toString(id), NULL), toString(id)), toString(id))) AS lambda, [NULL, NULL, 2147483647]) +FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'), toString(id)), concat(toString(id), NULL), toString(id)); +SELECT '--'; +-- +SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 }; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql new file mode 100644 index 00000000000..3be07a35425 --- /dev/null +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -0,0 +1,129 @@ +SET use_analyzer = 1; + +-- { echoOn } + +SELECT arrayMap(x -> x + arrayMap(x -> x + 1, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> x, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(y -> x + y, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5), [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT (SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> (SELECT 5 UNION DISTINCT SELECT 5), [1])[1], [1,2,3]); + +SELECT '--'; + +SELECT (SELECT 5 UNION DISTINCT SELECT 5) AS subquery, arrayMap(x -> x + arrayMap(x -> subquery, [1])[1], [1,2,3]); + +SELECT '--'; + +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]); + +SELECT '--'; + +WITH x -> toString(x) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]); + +SELECT '--'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO test_table VALUES (0, 'Value'); + +SELECT arrayMap(x -> x + arrayMap(x -> id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> x + id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(y -> x + y + id, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT id AS id_alias, arrayMap(x -> x + arrayMap(y -> x + y + id_alias, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> 5, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(x -> x + id + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT 5 AS constant, arrayMap(x -> x + arrayMap(y -> x + y + id + constant, [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> x + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> id + arrayMap(x -> id + (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table), [1])[1], [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> lambda(x), [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> lambda(y), [1,2,3]), [1,2,3]) FROM test_table; + +SELECT '--'; + +WITH x -> toString(id) AS lambda SELECT arrayMap(x -> arrayMap(y -> concat(lambda(x), '_', lambda(y)), [1,2,3]), [1,2,3]) FROM test_table; + +SELECT '--'; + +SELECT arrayMap(x -> concat(concat(concat(concat(concat(toString(id), '___\0_______\0____'), toString(id), concat(concat(toString(id), ''), toString(id)), toString(id)), + arrayMap(x -> concat(concat(concat(concat(toString(id), ''), toString(id)), toString(id), '___\0_______\0____'), toString(id)) AS lambda, [NULL, inf, 1, 1]), + concat(toString(id), NULL), toString(id)), toString(id))) AS lambda, [NULL, NULL, 2147483647]) +FROM test_table WHERE concat(concat(concat(toString(id), '___\0_______\0____'), toString(id)), concat(toString(id), NULL), toString(id)); + +SELECT '--'; + +SELECT arrayMap(x -> concat(toString(id), arrayMap(x -> toString(1), [NULL])), [NULL]) FROM test_table; -- { serverError 44 }; + +DROP TABLE test_table; + +-- { echoOff } diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c7ac00ee18f..3fd12051f4a 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -221,8 +221,12 @@ cutQueryString cutQueryStringAndFragment cutToFirstSignificantSubdomain cutToFirstSignificantSubdomainCustom +cutToFirstSignificantSubdomainCustomRFC cutToFirstSignificantSubdomainCustomWithWWW +cutToFirstSignificantSubdomainCustomWithWWWRFC +cutToFirstSignificantSubdomainRFC cutToFirstSignificantSubdomainWithWWW +cutToFirstSignificantSubdomainWithWWWRFC cutURLParameter cutWWW dateDiff @@ -281,7 +285,9 @@ dictHas dictIsIn divide domain +domainRFC domainWithoutWWW +domainWithoutWWWRFC dotProduct dumpColumnStructure e @@ -332,6 +338,8 @@ filesystemFree finalizeAggregation firstSignificantSubdomain firstSignificantSubdomainCustom +firstSignificantSubdomainCustomRFC +firstSignificantSubdomainRFC flattenTuple floor format @@ -593,6 +601,7 @@ polygonsUnionSpherical polygonsWithinCartesian polygonsWithinSpherical port +portRFC position positionCaseInsensitive positionCaseInsensitiveUTF8 @@ -898,6 +907,7 @@ toYearWeek today tokens topLevelDomain +topLevelDomainRFC transactionID transactionLatestSnapshot transactionOldestSnapshot diff --git a/tests/queries/0_stateless/02457_morton_coding.reference b/tests/queries/0_stateless/02457_morton_coding.reference new file mode 100644 index 00000000000..311a515a458 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.reference @@ -0,0 +1,12 @@ +----- START ----- +----- CONST ----- +2149 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +----- 256, 8 ----- +----- 65536, 4 ----- +----- 4294967296, 2 ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding.sql b/tests/queries/0_stateless/02457_morton_coding.sql new file mode 100644 index 00000000000..4fc26f255f4 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding.sql @@ -0,0 +1,137 @@ +SELECT '----- START -----'; +drop table if exists morton_numbers_02457; +create table morton_numbers_02457( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +SELECT '----- CONST -----'; +select mortonEncode(1,2,3,4); +select mortonDecode(4, 2149); +select mortonEncode(65534, 65533); +select mortonDecode(2, 4294967286); +select mortonEncode(4294967286); +select mortonDecode(1, 4294967286); + +SELECT '----- 256, 8 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, n5.number, n6.number, n7.number, n8.number +from numbers(256-4, 4) n1 + cross join numbers(256-4, 4) n2 + cross join numbers(256-4, 4) n3 + cross join numbers(256-4, 4) n4 + cross join numbers(256-4, 4) n5 + cross join numbers(256-4, 4) n6 + cross join numbers(256-4, 4) n7 + cross join numbers(256-4, 4) n8 +; +drop table if exists morton_numbers_1_02457; +create table morton_numbers_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64, + n5 UInt64, + n6 UInt64, + n7 UInt64, + n8 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_1_02457 +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) +from morton_numbers_02457; + +( + select * from morton_numbers_02457 + union distinct + select * from morton_numbers_1_02457 +) +except +( + select * from morton_numbers_02457 + intersect + select * from morton_numbers_1_02457 +); +drop table if exists morton_numbers_1_02457; + +SELECT '----- 65536, 4 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, n3.number, n4.number, 0, 0, 0, 0 +from numbers(pow(2, 16)-8,8) n1 + cross join numbers(pow(2, 16)-8, 8) n2 + cross join numbers(pow(2, 16)-8, 8) n3 + cross join numbers(pow(2, 16)-8, 8) n4 +; + +create table morton_numbers_2_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_2_02457 +select untuple(mortonDecode(4, mortonEncode(n1, n2, n3, n4))) +from morton_numbers_02457; + +( + select n1, n2, n3, n4 from morton_numbers_02457 + union distinct + select n1, n2, n3, n4 from morton_numbers_2_02457 +) +except +( + select n1, n2, n3, n4 from morton_numbers_02457 + intersect + select n1, n2, n3, n4 from morton_numbers_2_02457 +); +drop table if exists morton_numbers_2_02457; + +SELECT '----- 4294967296, 2 -----'; +insert into morton_numbers_02457 +select n1.number, n2.number, 0, 0, 0, 0, 0, 0 +from numbers(pow(2, 32)-8,8) n1 + cross join numbers(pow(2, 32)-8, 8) n2 + cross join numbers(pow(2, 32)-8, 8) n3 + cross join numbers(pow(2, 32)-8, 8) n4 +; + +drop table if exists morton_numbers_3_02457; +create table morton_numbers_3_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_3_02457 +select untuple(mortonDecode(2, mortonEncode(n1, n2))) +from morton_numbers_02457; + +( + select n1, n2 from morton_numbers_3_02457 + union distinct + select n1, n2 from morton_numbers_3_02457 +) +except +( + select n1, n2 from morton_numbers_3_02457 + intersect + select n1, n2 from morton_numbers_3_02457 +); +drop table if exists morton_numbers_3_02457; + +SELECT '----- END -----'; +drop table if exists morton_numbers_02457; diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.reference b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference new file mode 100644 index 00000000000..32d5ce3ee27 --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.reference @@ -0,0 +1,15 @@ +----- START ----- +----- CONST ----- +4205569 +(1,2,3,4) +4294967286 +(65534,65533) +4294967286 +(4294967286) +2147483648 +(128) +0 +----- (1,2,1,2) ----- +----- (1,4) ----- +----- (1,1,2) ----- +----- END ----- diff --git a/tests/queries/0_stateless/02457_morton_coding_with_mask.sql b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql new file mode 100644 index 00000000000..5aeb1f380be --- /dev/null +++ b/tests/queries/0_stateless/02457_morton_coding_with_mask.sql @@ -0,0 +1,143 @@ +SELECT '----- START -----'; + +SELECT '----- CONST -----'; +select mortonEncode((1,2,3,1), 1,2,3,4); +select mortonDecode((1, 2, 3, 1), 4205569); +select mortonEncode((1,1), 65534, 65533); +select mortonDecode((1,1), 4294967286); +select mortonEncode(tuple(1), 4294967286); +select mortonDecode(tuple(1), 4294967286); +select mortonEncode(tuple(4), 128); +select mortonDecode(tuple(4), 2147483648); +select mortonEncode((4,4,4,4), 128, 128, 128, 128); + +SELECT '----- (1,2,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt8, + n2 UInt8, + n3 UInt8, + n4 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number, n4.number +from numbers(256-16, 16) n1 + cross join numbers(256-16, 16) n2 + cross join numbers(256-16, 16) n3 + cross join numbers(256-16, 16) n4 +; +drop table if exists morton_numbers_mask_1_02457; +create table morton_numbers_mask_1_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64, + n4 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_1_02457 +select untuple(mortonDecode((1,2,1,2), mortonEncode((1,2,1,2), n1, n2, n3, n4))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_1_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_1_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_1_02457; + +SELECT '----- (1,4) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt32, + n2 UInt8 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number +from numbers(pow(2, 32)-64, 64) n1 + cross join numbers(pow(2, 8)-64, 64) n2 +; +drop table if exists morton_numbers_mask_2_02457; +create table morton_numbers_mask_2_02457( + n1 UInt64, + n2 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_2_02457 +select untuple(mortonDecode((1,4), mortonEncode((1,4), n1, n2))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_2_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_2_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_2_02457; + +SELECT '----- (1,1,2) -----'; +drop table if exists morton_numbers_mask_02457; +create table morton_numbers_mask_02457( + n1 UInt16, + n2 UInt16, + n3 UInt8, +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_02457 +select n1.number, n2.number, n3.number +from numbers(pow(2, 16)-64, 64) n1 + cross join numbers(pow(2, 16)-64, 64) n2 + cross join numbers(pow(2, 8)-64, 64) n3 +; +drop table if exists morton_numbers_mask_3_02457; +create table morton_numbers_mask_3_02457( + n1 UInt64, + n2 UInt64, + n3 UInt64 +) + Engine=MergeTree() + ORDER BY n1; + +insert into morton_numbers_mask_3_02457 +select untuple(mortonDecode((1,1,2), mortonEncode((1,1,2), n1, n2, n3))) +from morton_numbers_mask_02457; + +( + select * from morton_numbers_mask_02457 + union distinct + select * from morton_numbers_mask_3_02457 +) +except +( + select * from morton_numbers_mask_02457 + intersect + select * from morton_numbers_mask_3_02457 +); +drop table if exists morton_numbers_mask_02457; +drop table if exists morton_numbers_mask_3_02457; + +SELECT '----- END -----'; diff --git a/tests/queries/0_stateless/02458_datediff_date32.reference b/tests/queries/0_stateless/02458_datediff_date32.reference index 67bfa895199..9e93af03896 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.reference +++ b/tests/queries/0_stateless/02458_datediff_date32.reference @@ -1,56 +1,56 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('second', toDate32('1927-01-01'), toDate32('1927-01-02')); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDate32('1927-01-01'), toDate32('1927-01-02')); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDate32('1927-01-01'), toDate32('1927-01-02')); 24 -SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDate32('1927-01-01'), toDate32('1927-01-02')); 1 -SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); +SELECT dateDiff('week', toDate32('1927-01-01'), toDate32('1927-01-08')); 1 -SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); +SELECT dateDiff('month', toDate32('1927-01-01'), toDate32('1927-02-01')); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); +SELECT dateDiff('quarter', toDate32('1927-01-01'), toDate32('1927-04-01')); 1 -SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); +SELECT dateDiff('year', toDate32('1927-01-01'), toDate32('1928-01-01')); 1 -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('second', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('minute', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('hour', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); 24 -SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('day', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); 1 -SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3)); +SELECT dateDiff('week', toDate32('1927-01-01'), toDateTime64('1927-01-08 00:00:00', 3)); 1 -SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3)); +SELECT dateDiff('month', toDate32('1927-01-01'), toDateTime64('1927-02-01 00:00:00', 3)); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3)); +SELECT dateDiff('quarter', toDate32('1927-01-01'), toDateTime64('1927-04-01 00:00:00', 3)); 1 -SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3)); +SELECT dateDiff('year', toDate32('1927-01-01'), toDateTime64('1928-01-01 00:00:00', 3)); 1 -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('second', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); 86400 -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); 1440 -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); 24 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); 1 -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-08')); +SELECT dateDiff('week', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-08')); 1 -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-02-01')); +SELECT dateDiff('month', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-02-01')); 1 -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-04-01')); +SELECT dateDiff('quarter', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-04-01')); 1 -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1901-01-01')); +SELECT dateDiff('year', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1928-01-01')); 1 -- With DateTime -- Date32 vs DateTime @@ -123,11 +123,11 @@ SELECT dateDiff('quarter', toDate('2015-08-18'), toDate32('2015-11-18')); SELECT dateDiff('year', toDate('2015-08-18'), toDate32('2016-08-18')); 1 -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1927-01-01'), materialize(toDate32('1927-01-02'))); 1 -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3))); +SELECT dateDiff('day', toDate32('1927-01-01'), materialize(toDateTime64('1927-01-02 00:00:00', 3))); 1 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3), materialize(toDate32('1927-01-02'))); 1 SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDateTime('2015-08-19 00:00:00'))); 1 @@ -138,11 +138,11 @@ SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDate('2015-08-19')) SELECT dateDiff('day', toDate('2015-08-18'), materialize(toDate32('2015-08-19'))); 1 -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), toDate32('1927-01-02')); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), toDateTime64('1927-01-02 00:00:00', 3)); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3)), toDate32('1927-01-02')); 1 SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDateTime('2015-08-19 00:00:00')); 1 @@ -153,11 +153,11 @@ SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDate('2015-08-19') SELECT dateDiff('day', materialize(toDate('2015-08-18')), toDate32('2015-08-19')); 1 -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), materialize(toDate32('1927-01-02'))); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3))); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), materialize(toDateTime64('1927-01-02 00:00:00', 3))); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3)), materialize(toDate32('1927-01-02'))); 1 SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDateTime('2015-08-19 00:00:00'))); 1 diff --git a/tests/queries/0_stateless/02458_datediff_date32.sql b/tests/queries/0_stateless/02458_datediff_date32.sql index 4c26e04ac27..b4cb203080e 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.sql +++ b/tests/queries/0_stateless/02458_datediff_date32.sql @@ -1,35 +1,35 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); -SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); -SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); +SELECT dateDiff('second', toDate32('1927-01-01'), toDate32('1927-01-02')); +SELECT dateDiff('minute', toDate32('1927-01-01'), toDate32('1927-01-02')); +SELECT dateDiff('hour', toDate32('1927-01-01'), toDate32('1927-01-02')); +SELECT dateDiff('day', toDate32('1927-01-01'), toDate32('1927-01-02')); +SELECT dateDiff('week', toDate32('1927-01-01'), toDate32('1927-01-08')); +SELECT dateDiff('month', toDate32('1927-01-01'), toDate32('1927-02-01')); +SELECT dateDiff('quarter', toDate32('1927-01-01'), toDate32('1927-04-01')); +SELECT dateDiff('year', toDate32('1927-01-01'), toDate32('1928-01-01')); -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); -SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); -SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); -SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); -SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3)); -SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3)); -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3)); -SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3)); +SELECT dateDiff('second', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); +SELECT dateDiff('minute', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); +SELECT dateDiff('hour', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); +SELECT dateDiff('day', toDate32('1927-01-01'), toDateTime64('1927-01-02 00:00:00', 3)); +SELECT dateDiff('week', toDate32('1927-01-01'), toDateTime64('1927-01-08 00:00:00', 3)); +SELECT dateDiff('month', toDate32('1927-01-01'), toDateTime64('1927-02-01 00:00:00', 3)); +SELECT dateDiff('quarter', toDate32('1927-01-01'), toDateTime64('1927-04-01 00:00:00', 3)); +SELECT dateDiff('year', toDate32('1927-01-01'), toDateTime64('1928-01-01 00:00:00', 3)); -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-08')); -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-02-01')); -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-04-01')); -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1901-01-01')); +SELECT dateDiff('second', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); +SELECT dateDiff('minute', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); +SELECT dateDiff('hour', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); +SELECT dateDiff('week', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-08')); +SELECT dateDiff('month', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-02-01')); +SELECT dateDiff('quarter', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-04-01')); +SELECT dateDiff('year', toDateTime64('1927-01-01 00:00:00', 3), toDate32('1928-01-01')); -- With DateTime -- Date32 vs DateTime @@ -74,27 +74,27 @@ SELECT dateDiff('quarter', toDate('2015-08-18'), toDate32('2015-11-18')); SELECT dateDiff('year', toDate('2015-08-18'), toDate32('2016-08-18')); -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3))); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1927-01-01'), materialize(toDate32('1927-01-02'))); +SELECT dateDiff('day', toDate32('1927-01-01'), materialize(toDateTime64('1927-01-02 00:00:00', 3))); +SELECT dateDiff('day', toDateTime64('1927-01-01 00:00:00', 3), materialize(toDate32('1927-01-02'))); SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDateTime('2015-08-19 00:00:00'))); SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00'), materialize(toDate32('2015-08-19'))); SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDate('2015-08-19'))); SELECT dateDiff('day', toDate('2015-08-18'), materialize(toDate32('2015-08-19'))); -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3)); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), toDate32('1927-01-02')); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), toDateTime64('1927-01-02 00:00:00', 3)); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3)), toDate32('1927-01-02')); SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDateTime('2015-08-19 00:00:00')); SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), toDate32('2015-08-19')); SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDate('2015-08-19')); SELECT dateDiff('day', materialize(toDate('2015-08-18')), toDate32('2015-08-19')); -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3))); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), materialize(toDate32('1927-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1927-01-01')), materialize(toDateTime64('1927-01-02 00:00:00', 3))); +SELECT dateDiff('day', materialize(toDateTime64('1927-01-01 00:00:00', 3)), materialize(toDate32('1927-01-02'))); SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDateTime('2015-08-19 00:00:00'))); SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), materialize(toDate32('2015-08-19'))); SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDate('2015-08-19'))); diff --git a/tests/queries/0_stateless/02458_default_setting.reference b/tests/queries/0_stateless/02458_default_setting.reference index 376553843ac..8f4532f370b 100644 --- a/tests/queries/0_stateless/02458_default_setting.reference +++ b/tests/queries/0_stateless/02458_default_setting.reference @@ -1,5 +1,5 @@ -1048545 +1048449 100000 1 -1048545 +1048449 0 diff --git a/tests/queries/0_stateless/02469_fix_aliases_parser.reference b/tests/queries/0_stateless/02469_fix_aliases_parser.reference new file mode 100644 index 00000000000..09f584c9cd4 --- /dev/null +++ b/tests/queries/0_stateless/02469_fix_aliases_parser.reference @@ -0,0 +1,2 @@ +45 +[0] diff --git a/tests/queries/0_stateless/02469_fix_aliases_parser.sql b/tests/queries/0_stateless/02469_fix_aliases_parser.sql new file mode 100644 index 00000000000..227d8becdb6 --- /dev/null +++ b/tests/queries/0_stateless/02469_fix_aliases_parser.sql @@ -0,0 +1,9 @@ +SELECT sum(number number number) FROM numbers(10); -- { clientError 62 } +SELECT sum(number number) FROM numbers(10); -- { clientError 62 } +SELECT sum(number AS number) FROM numbers(10); + +SELECT [number number number] FROM numbers(1); -- { clientError 62 } +SELECT [number number] FROM numbers(1); -- { clientError 62 } +SELECT [number AS number] FROM numbers(1); + +SELECT cast('1234' lhs lhs, 'UInt32'), lhs; -- { clientError 62 } \ No newline at end of file diff --git a/tests/queries/0_stateless/02473_map_element_nullable.reference b/tests/queries/0_stateless/02473_map_element_nullable.reference new file mode 100644 index 00000000000..84a9ba03bb4 --- /dev/null +++ b/tests/queries/0_stateless/02473_map_element_nullable.reference @@ -0,0 +1,16 @@ +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N +2 \N \N diff --git a/tests/queries/0_stateless/02473_map_element_nullable.sql b/tests/queries/0_stateless/02473_map_element_nullable.sql new file mode 100644 index 00000000000..e9c351d112c --- /dev/null +++ b/tests/queries/0_stateless/02473_map_element_nullable.sql @@ -0,0 +1,19 @@ +WITH map(1, 2, 3, NULL) AS m SELECT m[toNullable(1)], m[toNullable(2)], m[toNullable(3)]; +WITH map(1, 2, 3, NULL) AS m SELECT m[materialize(toNullable(1))], m[materialize(toNullable(2))], m[materialize(toNullable(3))]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[toNullable(1)], m[toNullable(2)], m[toNullable(3)]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[materialize(toNullable(1))], m[materialize(toNullable(2))], m[materialize(toNullable(3))]; + +WITH map('a', 2, 'b', NULL) AS m SELECT m[toNullable('a')], m[toNullable('b')], m[toNullable('c')]; +WITH map('a', 2, 'b', NULL) AS m SELECT m[materialize(toNullable('a'))], m[materialize(toNullable('b'))], m[materialize(toNullable('c'))]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[toNullable('a')], m[toNullable('b')], m[toNullable('c')]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[materialize(toNullable('a'))], m[materialize(toNullable('b'))], m[materialize(toNullable('c'))]; + +WITH map(1, 2, 3, NULL) AS m SELECT m[1], m[2], m[3]; +WITH map(1, 2, 3, NULL) AS m SELECT m[materialize(1)], m[materialize(2)], m[materialize(3)]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[1], m[2], m[3]; +WITH materialize(map(1, 2, 3, NULL)) AS m SELECT m[materialize(1)], m[materialize(2)], m[materialize(3)]; + +WITH map('a', 2, 'b', NULL) AS m SELECT m['a'], m['b'], m['c']; +WITH map('a', 2, 'b', NULL) AS m SELECT m[materialize('a')], m[materialize('b')], m[materialize('c')]; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m['a'], m['b'], m['c']; +WITH materialize(map('a', 2, 'b', NULL)) AS m SELECT m[materialize('a')], m[materialize('b')], m[materialize('c')]; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index a79982bbd61..92a97a9c60e 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -44,5 +44,3 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (memcpy-bench) endif () endif () - -add_subdirectory (package) diff --git a/utils/package/CMakeLists.txt b/utils/package/CMakeLists.txt deleted file mode 100644 index 8c8a09adc0f..00000000000 --- a/utils/package/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory (arch) diff --git a/utils/package/arch/CMakeLists.txt b/utils/package/arch/CMakeLists.txt deleted file mode 100644 index 4ee754fec56..00000000000 --- a/utils/package/arch/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -include ("${ClickHouse_SOURCE_DIR}/cmake/version.cmake") -configure_file (PKGBUILD.in PKGBUILD) diff --git a/utils/package/arch/PKGBUILD.in b/utils/package/arch/PKGBUILD.in deleted file mode 100644 index 4e068e8b8a2..00000000000 --- a/utils/package/arch/PKGBUILD.in +++ /dev/null @@ -1,33 +0,0 @@ -pkgname=clickhouse -pkgver=${VERSION_STRING} -pkgrel=1 -pkgdesc='An open-source column-oriented database management system that allows generating analytical data reports in real time' -arch=('x86_64') -url='https://clickhouse.com/' -license=('Apache') - -package() { - install -dm 755 $pkgdir/usr/lib/tmpfiles.d - install -dm 755 $pkgdir/usr/lib/sysusers.d - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.tmpfiles $pkgdir/usr/lib/tmpfiles.d/clickhouse.conf - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.sysusers $pkgdir/usr/lib/sysusers.d/clickhouse.conf - install -dm 755 $pkgdir/etc/clickhouse-server/config.d - install -Dm 644 ${CMAKE_CURRENT_SOURCE_DIR}/logging.xml $pkgdir/etc/clickhouse-server/config.d/logging.xml - # This code was requisited from kmeaw@ https://aur.archlinux.org/packages/clickhouse/ . - SRC=${ClickHouse_SOURCE_DIR} - BIN=${ClickHouse_BINARY_DIR} - mkdir -p $pkgdir/etc/clickhouse-server/ $pkgdir/etc/clickhouse-client/ - mkdir -p $pkgdir/usr/bin/ - mkdir -p $pkgdir/usr/lib/systemd/system - ln -s clickhouse-client $pkgdir/usr/bin/clickhouse-server - cp $SRC/programs/server/config.xml $SRC/programs/server/users.xml $pkgdir/etc/clickhouse-server/ - cp $BIN/programs/clickhouse $pkgdir/usr/bin/clickhouse-client - patchelf --remove-rpath $pkgdir/usr/bin/clickhouse-client - patchelf --replace-needed libz.so.1 libz-ng.so.1 $pkgdir/usr/bin/clickhouse-client - cp $SRC/programs/client/clickhouse-client.xml $pkgdir/etc/clickhouse-client/config.xml - compiler="libclickhouse-compiler.so" - if ! pacman -Q clang | grep '^clang 7'; then - compiler="" - fi - cp $SRC/debian/clickhouse-server.service $pkgdir/usr/lib/systemd/system -} diff --git a/utils/package/arch/README.md b/utils/package/arch/README.md deleted file mode 100644 index 0db5aac8080..00000000000 --- a/utils/package/arch/README.md +++ /dev/null @@ -1,17 +0,0 @@ -### Build Arch Linux package - -From binary directory: - -``` -make -cd utils/package/arch -makepkg -``` - -### Install and start ClickHouse server - -``` -pacman -U clickhouse-*.pkg.tar.xz -systemctl enable clickhouse-server -systemctl start clickhouse-server -``` diff --git a/utils/package/arch/clickhouse.sysusers b/utils/package/arch/clickhouse.sysusers deleted file mode 100644 index 4381c52c4f2..00000000000 --- a/utils/package/arch/clickhouse.sysusers +++ /dev/null @@ -1,3 +0,0 @@ -u clickhouse - "ClickHouse user" /nonexistent /bin/false -g clickhouse - "ClickHouse group" -m clickhouse clickhouse diff --git a/utils/package/arch/clickhouse.tmpfiles b/utils/package/arch/clickhouse.tmpfiles deleted file mode 100644 index 631aa895f2f..00000000000 --- a/utils/package/arch/clickhouse.tmpfiles +++ /dev/null @@ -1 +0,0 @@ -d /var/lib/clickhouse 0700 clickhouse clickhouse diff --git a/utils/package/arch/logging.xml b/utils/package/arch/logging.xml deleted file mode 100644 index c7a78442424..00000000000 --- a/utils/package/arch/logging.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - -