diff --git a/.github/ISSUE_TEMPLATE/96_installation-issues.md b/.github/ISSUE_TEMPLATE/96_installation-issues.md
new file mode 100644
index 00000000000..c322ccc92ce
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/96_installation-issues.md
@@ -0,0 +1,29 @@
+---
+name: Installation issue
+about: Issue with ClickHouse installation from https://clickhouse.com/docs/en/install/
+title: ''
+labels: comp-install
+assignees: ''
+
+---
+
+**Installation type**
+
+Packages, docker, single binary, curl?
+
+**Source of the ClickHouse**
+
+A link to the source. Or the command you've tried
+
+**Expected result**
+
+What you expected
+
+**The actual result**
+
+What you get
+
+**How to reproduce**
+
+* For Linux-based operating systems: provide a script for clear docker container from the official image
+* For anything else: steps to reproduce on as much as possible clear system
diff --git a/.gitignore b/.gitignore
index af4615a8e6c..09d3f4a4e33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -80,6 +80,7 @@ core
vgcore*
*.deb
+*.tar.zst
*.build
*.upload
*.changes
diff --git a/.gitmodules b/.gitmodules
index abd29c38846..293029ad171 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -287,3 +287,6 @@
[submodule "contrib/corrosion"]
path = contrib/corrosion
url = https://github.com/corrosion-rs/corrosion.git
+[submodule "contrib/morton-nd"]
+ path = contrib/morton-nd
+ url = https://github.com/morton-nd/morton-nd
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 83c1cbf1eb4..22f6afc4901 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
### Table of Contents
+**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
**[ClickHouse release v22.7, 2022-07-21](#227)**
@@ -10,6 +11,136 @@
**[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.10, 2022-10-26
+
+#### Backward Incompatible Change
+* Rename cache commands: `show caches` -> `show filesystem caches`, `describe cache` -> `describe filesystem cache`. [#41508](https://github.com/ClickHouse/ClickHouse/pull/41508) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Remove support for the `WITH TIMEOUT` section for `LIVE VIEW`. This closes [#40557](https://github.com/ClickHouse/ClickHouse/issues/40557). [#42173](https://github.com/ClickHouse/ClickHouse/pull/42173) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove support for the `{database}` macro from the client's prompt. It was displayed incorrectly if the database was unspecified and it was not updated on `USE` statements. This closes [#25891](https://github.com/ClickHouse/ClickHouse/issues/25891). [#42508](https://github.com/ClickHouse/ClickHouse/pull/42508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### New Feature
+* Composable protocol configuration is added. Now different protocols can be set up with different listen hosts. Protocol wrappers such as PROXYv1 can be set up over any other protocols (TCP, TCP secure, MySQL, Postgres). [#41198](https://github.com/ClickHouse/ClickHouse/pull/41198) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Add `S3` as a new type of the destination of backups. Support BACKUP to S3 with as-is path/data structure. [#42333](https://github.com/ClickHouse/ClickHouse/pull/42333) ([Vitaly Baranov](https://github.com/vitlibar)), [#42232](https://github.com/ClickHouse/ClickHouse/pull/42232) ([Azat Khuzhin](https://github.com/azat)).
+* Added functions (`randUniform`, `randNormal`, `randLogNormal`, `randExponential`, `randChiSquared`, `randStudentT`, `randFisherF`, `randBernoulli`, `randBinomial`, `randNegativeBinomial`, `randPoisson`) to generate random values according to the specified distributions. This closes [#21834](https://github.com/ClickHouse/ClickHouse/issues/21834). [#42411](https://github.com/ClickHouse/ClickHouse/pull/42411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* An improvement for ClickHouse Keeper: add support for uploading snapshots to S3. S3 information can be defined inside `keeper_server.s3_snapshot`. [#41342](https://github.com/ClickHouse/ClickHouse/pull/41342) ([Antonio Andelic](https://github.com/antonio2368)).
+* Added an aggregate function `analysisOfVariance` (`anova`) to perform a statistical test over several groups of normally distributed observations to find out whether all groups have the same mean or not. Original PR [#37872](https://github.com/ClickHouse/ClickHouse/issues/37872). [#42131](https://github.com/ClickHouse/ClickHouse/pull/42131) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Support limiting of temporary data stored on disk using settings `max_temporary_data_on_disk_size_for_user`/`max_temporary_data_on_disk_size_for_query` . [#40893](https://github.com/ClickHouse/ClickHouse/pull/40893) ([Vladimir C](https://github.com/vdimir)).
+* Add setting `format_json_object_each_row_column_for_object_name` to write/parse object name as column value in JSONObjectEachRow format. [#41703](https://github.com/ClickHouse/ClickHouse/pull/41703) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add BLAKE3 hash-function to SQL. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)).
+* The function `javaHash` has been extended to integers. [#41131](https://github.com/ClickHouse/ClickHouse/pull/41131) ([JackyWoo](https://github.com/JackyWoo)).
+* Add OpenTelemetry support to ON CLUSTER DDL (require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)).
+* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)).
+* Add support for methods `lz4`, `bz2`, `snappy` in HTTP's `Accept-Encoding` which is a non-standard extension to HTTP protocol. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)).
+
+#### Experimental Feature
+* Added new infrastructure for query analysis and planning under the `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)).
+* Initial implementation of Kusto Query Language. Please don't use it. [#37961](https://github.com/ClickHouse/ClickHouse/pull/37961) ([Yong Wang](https://github.com/kashwy)).
+
+#### Performance Improvement
+* Relax the "Too many parts" threshold. This closes [#6551](https://github.com/ClickHouse/ClickHouse/issues/6551). Now ClickHouse will allow more parts in a partition if the average part size is large enough (at least 10 GiB). This allows to have up to petabytes of data in a single partition of a single table on a single server, which is possible using disk shelves or object storage. [#42002](https://github.com/ClickHouse/ClickHouse/pull/42002) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Implement operator precedence element parser to make the required stack size smaller. [#34892](https://github.com/ClickHouse/ClickHouse/pull/34892) ([Nikolay Degterinsky](https://github.com/evillique)).
+* DISTINCT in order optimization leverage sorting properties of data streams. This improvement will enable reading in order for DISTINCT if applicable (before it was necessary to provide ORDER BY for columns in DISTINCT). [#41014](https://github.com/ClickHouse/ClickHouse/pull/41014) ([Igor Nikonov](https://github.com/devcrafter)).
+* ColumnVector: optimize UInt8 index with AVX512VBMI. [#41247](https://github.com/ClickHouse/ClickHouse/pull/41247) ([Guo Wangyang](https://github.com/guowangy)).
+* Optimize the lock contentions for `ThreadGroupStatus::mutex`. The performance experiments of **SSB** (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could bring a **2.95x** improvement of the geomean of all subcases' QPS. [#41675](https://github.com/ClickHouse/ClickHouse/pull/41675) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
+* Add `ldapr` capabilities to AArch64 builds. This is supported from Graviton 2+, Azure and GCP instances. Only appeared in clang-15 [not so long ago](https://github.com/llvm/llvm-project/commit/9609b5daffe9fd28d83d83da895abc5113f76c24). [#41778](https://github.com/ClickHouse/ClickHouse/pull/41778) ([Daniel Kutenin](https://github.com/danlark1)).
+* Improve performance when comparing strings and one argument is an empty constant string. [#41870](https://github.com/ClickHouse/ClickHouse/pull/41870) ([Jiebin Sun](https://github.com/jiebinn)).
+* Optimize `insertFrom` of ColumnAggregateFunction to share Aggregate State in some cases. [#41960](https://github.com/ClickHouse/ClickHouse/pull/41960) ([flynn](https://github.com/ucasfl)).
+* Make writing to `azure_blob_storage` disks faster (respect `max_single_part_upload_size` instead of writing a block per each buffer size). Inefficiency mentioned in [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42041](https://github.com/ClickHouse/ClickHouse/pull/42041) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Make thread ids in the process list and query_log unique to avoid waste. [#42180](https://github.com/ClickHouse/ClickHouse/pull/42180) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Support skipping cache completely (both download to cache and reading cached data) in case the requested read range exceeds the threshold defined by cache setting `bypass_cache_threashold`, requires to be enabled with `enable_bypass_cache_with_threshold`). [#42418](https://github.com/ClickHouse/ClickHouse/pull/42418) ([Han Shukai](https://github.com/KinderRiven)). This helps on slow local disks.
+
+#### Improvement
+* Add setting `allow_implicit_no_password`: in combination with `allow_no_password` it forbids creating a user with no password unless `IDENTIFIED WITH no_password` is explicitly specified. [#41341](https://github.com/ClickHouse/ClickHouse/pull/41341) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Embedded Keeper will always start in the background allowing ClickHouse to start without achieving quorum. [#40991](https://github.com/ClickHouse/ClickHouse/pull/40991) ([Antonio Andelic](https://github.com/antonio2368)).
+* Made reestablishing a new connection to ZooKeeper more reactive in case of expiration of the previous one. Previously there was a task which spawns every minute by default and thus a table could be in readonly state for about this time. [#41092](https://github.com/ClickHouse/ClickHouse/pull/41092) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Now projections can be used with zero copy replication (zero-copy replication is a non-production feature). [#41147](https://github.com/ClickHouse/ClickHouse/pull/41147) ([alesapin](https://github.com/alesapin)).
+* Support expression `(EXPLAIN SELECT ...)` in a subquery. Queries like `SELECT * FROM (EXPLAIN PIPELINE SELECT col FROM TABLE ORDER BY col)` became valid. [#40630](https://github.com/ClickHouse/ClickHouse/pull/40630) ([Vladimir C](https://github.com/vdimir)).
+* Allow changing `async_insert_max_data_size` or `async_insert_busy_timeout_ms` in scope of query. E.g. user wants to insert data rarely and she doesn't have access to the server config to tune default settings. [#40668](https://github.com/ClickHouse/ClickHouse/pull/40668) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Improvements for reading from remote filesystems, made threadpool size for reads/writes configurable. Closes [#41070](https://github.com/ClickHouse/ClickHouse/issues/41070). [#41011](https://github.com/ClickHouse/ClickHouse/pull/41011) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Support all combinators combination in WindowTransform/arratReduce*/initializeAggregation/aggregate functions versioning. Previously combinators like `ForEach/Resample/Map` didn't work in these places, using them led to exception like`State function ... inserts results into non-state column`. [#41107](https://github.com/ClickHouse/ClickHouse/pull/41107) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add function `tryDecrypt` that returns NULL when decrypt fails (e.g. decrypt with incorrect key) instead of throwing an exception. [#41206](https://github.com/ClickHouse/ClickHouse/pull/41206) ([Duc Canh Le](https://github.com/canhld94)).
+* Add the `unreserved_space` column to the `system.disks` table to check how much space is not taken by reservations per disk. [#41254](https://github.com/ClickHouse/ClickHouse/pull/41254) ([filimonov](https://github.com/filimonov)).
+* Support s3 authorization headers in table function arguments. [#41261](https://github.com/ClickHouse/ClickHouse/pull/41261) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add support for MultiRead in Keeper and internal ZooKeeper client (this is an extension to ZooKeeper protocol, only available in ClickHouse Keeper). [#41410](https://github.com/ClickHouse/ClickHouse/pull/41410) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for decimal type comparing with floating point literal in IN operator. [#41544](https://github.com/ClickHouse/ClickHouse/pull/41544) ([liang.huang](https://github.com/lhuang09287750)).
+* Allow readable size values (like `1TB`) in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these periods ClickHouse can nevertheless try to establish a connection and produce errors. This behavior is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)).
+*
+Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Add function `tryBase58Decode`, similar to the existing function `tryBase64Decode`. [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)).
+* Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)).
+* Fix parallel parsing: segmentator now checks `max_block_size`. This fixed memory overallocation in case of parallel parsing and small LIMIT. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Don't add "TABLE_IS_DROPPED" exception to `system.errors` if it's happened during SELECT from a system table and was ignored. [#41908](https://github.com/ClickHouse/ClickHouse/pull/41908) ([AlfVII](https://github.com/AlfVII)).
+* Improve option `enable_extended_results_for_datetime_functions` to return results of type DateTime64 for functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute` and `timeSlot`. [#41910](https://github.com/ClickHouse/ClickHouse/pull/41910) ([Roman Vasin](https://github.com/rvasin)).
+* Improve `DateTime` type inference for text formats. Now it respects setting `date_time_input_format` and doesn't try to infer datetimes from numbers as timestamps. Closes [#41389](https://github.com/ClickHouse/ClickHouse/issues/41389) Closes [#42206](https://github.com/ClickHouse/ClickHouse/issues/42206). [#41912](https://github.com/ClickHouse/ClickHouse/pull/41912) ([Kruglov Pavel](https://github.com/Avogar)).
+* Remove confusing warning when inserting with `perform_ttl_move_on_insert` = false. [#41980](https://github.com/ClickHouse/ClickHouse/pull/41980) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Allow user to write `countState(*)` similar to `count(*)`. This closes [#9338](https://github.com/ClickHouse/ClickHouse/issues/9338). [#41983](https://github.com/ClickHouse/ClickHouse/pull/41983) ([Amos Bird](https://github.com/amosbird)).
+* Fix `rankCorr` size overflow. [#42020](https://github.com/ClickHouse/ClickHouse/pull/42020) ([Duc Canh Le](https://github.com/canhld94)).
+* Added an option to specify an arbitrary string as an environment name in the Sentry's config for more handy reports. [#42037](https://github.com/ClickHouse/ClickHouse/pull/42037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix parsing out-of-range Date from CSV. [#42044](https://github.com/ClickHouse/ClickHouse/pull/42044) ([Andrey Zvonov](https://github.com/zvonand)).
+* `parseDataTimeBestEffort` now supports comma between date and time. Closes [#42038](https://github.com/ClickHouse/ClickHouse/issues/42038). [#42049](https://github.com/ClickHouse/ClickHouse/pull/42049) ([flynn](https://github.com/ucasfl)).
+* Improved stale replica recovery process for `ReplicatedMergeTree`. If a lost replica has some parts which are absent from a healthy replica, but these parts should appear in the future according to the replication queue of the healthy replica, then the lost replica will keep such parts instead of detaching them. [#42134](https://github.com/ClickHouse/ClickHouse/pull/42134) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add a possibility to use `Date32` arguments for date_diff function. Fix issue in date_diff function when using DateTime64 arguments with a start date before Unix epoch and end date after Unix epoch. [#42308](https://github.com/ClickHouse/ClickHouse/pull/42308) ([Roman Vasin](https://github.com/rvasin)).
+* When uploading big parts to Minio, 'Complete Multipart Upload' can take a long time. Minio sends heartbeats every 10 seconds (see https://github.com/minio/minio/pull/7198). But clickhouse times out earlier, because the default send/receive timeout is [set](https://github.com/ClickHouse/ClickHouse/blob/cc24fcd6d5dfb67f5f66f5483e986bd1010ad9cf/src/IO/S3/PocoHTTPClient.cpp#L123) to 5 seconds. [#42321](https://github.com/ClickHouse/ClickHouse/pull/42321) ([filimonov](https://github.com/filimonov)).
+* Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)).
+* Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)).
+* Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)).
+
+#### Build/Testing/Packaging Improvement
+* Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). This represents the biggest advancement for ClickHouse testing in this year so far.
+* Beta version of the ClickHouse Cloud service is released: [https://clickhouse.cloud/](https://clickhouse.cloud/). It provides the easiest way to use ClickHouse (even slightly easier than the single-command installation).
+* Added support of WHERE clause generation to AST Fuzzer and possibility to add or remove ORDER BY and WHERE clause. [#38519](https://github.com/ClickHouse/ClickHouse/pull/38519) ([Ilya Yatsishin](https://github.com/qoega)).
+* Aarch64 binaries now require at least ARMv8.2, released in 2016. Most notably, this enables use of ARM LSE, i.e. native atomic operations. Also, CMake build option "NO_ARMV81_OR_HIGHER" has been added to allow compilation of binaries for older ARMv8.0 hardware, e.g. Raspberry Pi 4. [#41610](https://github.com/ClickHouse/ClickHouse/pull/41610) ([Robert Schulze](https://github.com/rschu1ze)).
+* Allow building ClickHouse with Musl (small changes after it was already supported but broken). [#41987](https://github.com/ClickHouse/ClickHouse/pull/41987) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add the `$CLICKHOUSE_CRONFILE` file checking to avoid running the `sed` command to get the file not found error on install. [#42081](https://github.com/ClickHouse/ClickHouse/pull/42081) ([Chun-Sheng, Li](https://github.com/peter279k)).
+* Update cctz to `2022e` to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)).
+* Add Rust code support into ClickHouse with BLAKE3 hash-function library as an example. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)).
+
+#### Bug Fix (user-visible misbehavior in official stable or prestable release)
+
+* Choose correct aggregation method for `LowCardinality` with big integer types. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)).
+* Several fixes for `web` disk. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixes an issue that causes docker run to fail if `https_port` is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Mutations were not cancelled properly on server shutdown or `SYSTEM STOP MERGES` query and cancellation might take long time, it's fixed. [#41699](https://github.com/ClickHouse/ClickHouse/pull/41699) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix wrong result of queries with `ORDER BY` or `GROUP BY` by columns from prefix of sorting key, wrapped into monotonic functions, with enable "read in order" optimization (settings `optimize_read_in_order` and `optimize_aggregation_in_order`). [#41701](https://github.com/ClickHouse/ClickHouse/pull/41701) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Don't allow to create or alter merge tree tables with column name `_row_exists`, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)).
+* Fix a bug that CORS headers are missing in some HTTP responses. [#41792](https://github.com/ClickHouse/ClickHouse/pull/41792) ([Frank Chen](https://github.com/FrankChen021)).
+* 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* When the batch sending fails for some reason, it cannot be automatically recovered, and if it is not processed in time, it will lead to accumulation, and the printed error message will become longer and longer, which will cause the http thread to block. [#41813](https://github.com/ClickHouse/ClickHouse/pull/41813) ([zhongyuankai](https://github.com/zhongyuankai)).
+* Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)).
+* Old versions of Replicated database don't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Fix possible exception in fs cache. [#41884](https://github.com/ClickHouse/ClickHouse/pull/41884) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix `use_environment_credentials` for s3 table function. [#41970](https://github.com/ClickHouse/ClickHouse/pull/41970) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fixed "Directory already exists and is not empty" error on detaching broken part that might prevent `ReplicatedMergeTree` table from starting replication. Fixes [#40957](https://github.com/ClickHouse/ClickHouse/issues/40957). [#41981](https://github.com/ClickHouse/ClickHouse/pull/41981) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* `toDateTime64` now returns the same output with negative integer and float arguments. [#42025](https://github.com/ClickHouse/ClickHouse/pull/42025) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix write into `azure_blob_storage`. Partially closes [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42034](https://github.com/ClickHouse/ClickHouse/pull/42034) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix the `bzip2` decoding issue for specific `bzip2` files. [#42046](https://github.com/ClickHouse/ClickHouse/pull/42046) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix SQL function `toLastDayOfMonth` with setting "enable_extended_results_for_datetime_functions = 1" at the beginning of the extended range (January 1900). - Fix SQL function "toRelativeWeekNum()" with setting "enable_extended_results_for_datetime_functions = 1" at the end of extended range (December 2299). - Improve the performance of for SQL functions "toISOYear()", "toFirstDayNumOfISOYearIndex()" and "toYearWeekOfNewyearMode()" by avoiding unnecessary index arithmetics. [#42084](https://github.com/ClickHouse/ClickHouse/pull/42084) ([Roman Vasin](https://github.com/rvasin)).
+* The maximum size of fetches for each table accidentally was set to 8 while the pool size could be bigger. Now the maximum size of fetches for table is equal to the pool size. [#42090](https://github.com/ClickHouse/ClickHouse/pull/42090) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* A table might be shut down and a dictionary might be detached before checking if can be dropped without breaking dependencies between table, it's fixed. Fixes [#41982](https://github.com/ClickHouse/ClickHouse/issues/41982). [#42106](https://github.com/ClickHouse/ClickHouse/pull/42106) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix possible timeout exception for distributed queries with use_hedged_requests = 0. [#42130](https://github.com/ClickHouse/ClickHouse/pull/42130) ([Azat Khuzhin](https://github.com/azat)).
+* Fixed a minor bug inside function `runningDifference` in case of using it with `Date32` type. Previously `Date` was used and it may cause some logical errors like `Bad cast from type DB::ColumnVector to DB::ColumnVector'`. [#42143](https://github.com/ClickHouse/ClickHouse/pull/42143) ([Alfred Xu](https://github.com/sperlingxx)).
+* Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)).
+* DISTINCT in order fails with LOGICAL_ERROR if first column in sorting key contains function. [#42186](https://github.com/ClickHouse/ClickHouse/pull/42186) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix read from `Buffer` tables with read in order desc. [#42236](https://github.com/ClickHouse/ClickHouse/pull/42236) ([Duc Canh Le](https://github.com/canhld94)).
+* Fix a bug which prevents ClickHouse to start when `background_pool_size setting` is set on default profile but `background_merges_mutations_concurrency_ratio` is not. [#42315](https://github.com/ClickHouse/ClickHouse/pull/42315) ([nvartolomei](https://github.com/nvartolomei)).
+* `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix a data race in query finish/cancel. This closes [#42346](https://github.com/ClickHouse/ClickHouse/issues/42346). [#42362](https://github.com/ClickHouse/ClickHouse/pull/42362) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix assert cast in join on falsy condition, Close [#42380](https://github.com/ClickHouse/ClickHouse/issues/42380). [#42407](https://github.com/ClickHouse/ClickHouse/pull/42407) ([Vladimir C](https://github.com/vdimir)).
+* Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* `AggregateFunctionQuantile` now correctly works with UInt128 columns. Previously, the quantile state interpreted `UInt128` columns as `Int128` which could have led to incorrect results. [#42473](https://github.com/ClickHouse/ClickHouse/pull/42473) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix bad_cast assert during INSERT into `Annoy` indexes over non-Float32 columns. `Annoy` indices is an experimental feature. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)).
+* Arithmetic operator with Date or DateTime and 128 or 256-bit integer was referencing uninitialized memory. [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)).
+
+
### ClickHouse release 22.9, 2022-09-22
#### Backward Incompatible Change
diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake
index 7cba7c7548d..a554992caf3 100644
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@@ -81,6 +81,7 @@ elseif (ARCH_AMD64)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)
option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
+ option (ENABLE_BMI2 "Use BMI2 instructions on x86_64 (depends on ENABLE_AVX2)" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
@@ -96,6 +97,7 @@ elseif (ARCH_AMD64)
SET(ENABLE_AVX512 0)
SET(ENABLE_AVX512_VBMI 0)
SET(ENABLE_BMI 0)
+ SET(ENABLE_BMI2 0)
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
@@ -243,6 +245,20 @@ elseif (ARCH_AMD64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
+ set (TEST_FLAG "-mbmi2")
+ set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
+ check_cxx_source_compiles("
+ #include
+ int main() {
+ auto a = _pdep_u64(0, 0);
+ (void)a;
+ return 0;
+ }
+ " HAVE_BMI2)
+ if (HAVE_BMI2 AND HAVE_AVX2 AND ENABLE_AVX2 AND ENABLE_BMI2)
+ set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
+ endif ()
+
# Limit avx2/avx512 flag for specific source build
set (X86_INTRINSICS_FLAGS "")
if (ENABLE_AVX2_FOR_SPEC_OP)
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 73610545009..f0cef54b0b8 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -85,7 +85,7 @@ if (SANITIZE)
# and they have a bunch of flags not halt the program if UIO happend and even to silence that warnings.
# But for unknown reason that flags don't work with ClickHouse or we don't understand how to properly use them,
# that's why we often receive reports about UIO. The simplest way to avoid this is just set this flag here.
- set(UBSAN_FLAGS "${SAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
+ set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
endif()
if (COMPILER_CLANG)
set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index c2b16ae6dd6..8ebd4ab55d3 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -165,6 +165,7 @@ add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
+add_contrib (morton-nd-cmake morton-nd)
add_contrib(annoy-cmake annoy)
diff --git a/contrib/morton-nd b/contrib/morton-nd
new file mode 160000
index 00000000000..3795491a4aa
--- /dev/null
+++ b/contrib/morton-nd
@@ -0,0 +1 @@
+Subproject commit 3795491a4aa3cdc916c8583094683f0d68df5bc0
diff --git a/contrib/morton-nd-cmake/CMakeLists.txt b/contrib/morton-nd-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..4842781503f
--- /dev/null
+++ b/contrib/morton-nd-cmake/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_library(_morton_nd INTERFACE)
+target_include_directories(_morton_nd SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/morton-nd/include/")
+add_library(ch_contrib::morton_nd ALIAS _morton_nd)
diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 2954cd574d0..77afc3e924b 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -73,7 +73,7 @@ RUN apt-get install binutils-riscv64-linux-gnu
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
-ARG NFPM_VERSION=2.18.1
+ARG NFPM_VERSION=2.20.0
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \
diff --git a/docker/packager/packager b/docker/packager/packager
index b4aa4ebdd91..83629dc7408 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -208,6 +208,7 @@ def parse_env_variables(
cxx = cc.replace("gcc", "g++").replace("clang", "clang++")
if package_type == "deb":
+ # NOTE: This are the env for packages/build script
result.append("MAKE_DEB=true")
cmake_flags.append("-DENABLE_TESTS=0")
cmake_flags.append("-DENABLE_UTILS=0")
@@ -268,6 +269,7 @@ def parse_env_variables(
result.append('DISTCC_HOSTS="localhost/`nproc`"')
if additional_pkgs:
+ # NOTE: This are the env for packages/build script
result.append("MAKE_APK=true")
result.append("MAKE_RPM=true")
result.append("MAKE_TGZ=true")
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 9d6cf22c817..de9125d565b 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -136,6 +136,7 @@ function clone_submodules
contrib/wyhash
contrib/hashidsxx
contrib/c-ares
+ contrib/morton-nd
)
git submodule sync
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 27c96acbae1..7058853b43e 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -271,10 +271,6 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
&& grep -a ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
-echo "Get previous release tag"
-previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag)
-echo $previous_release_tag
-
stop
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
@@ -332,6 +328,10 @@ zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
echo -e "Backward compatibility check\n"
+echo "Get previous release tag"
+previous_release_tag=$(clickhouse-client --version | grep -o "[0-9]*\.[0-9]*\.[0-9]*\.[0-9]*" | get_previous_release_tag)
+echo $previous_release_tag
+
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
@@ -480,6 +480,7 @@ else
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
+ -e "Code: 269. DB::Exception: Destination table is myself" \
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index 7f3f38bd8f5..a0ec86f7fbe 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -286,9 +286,7 @@ if __name__ == "__main__":
# But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY.
"--client-option",
"max_untracked_memory=1Gi",
- "--client-option",
"max_memory_usage_for_user=0",
- "--client-option",
"memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md
index ef20c03883f..2a89bfda2e7 100644
--- a/docs/en/getting-started/example-datasets/uk-price-paid.md
+++ b/docs/en/getting-started/example-datasets/uk-price-paid.md
@@ -101,7 +101,7 @@ SELECT count()
FROM uk_price_paid
```
-At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
+At the time this query was run, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
```sql
SELECT formatReadableSize(total_bytes)
@@ -342,7 +342,7 @@ The result looks like:
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
-[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
+[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At query time, ClickHouse will use your projection if it thinks the projection can improve the performance of the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
### Build a Projection {#build-projection}
diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md
index 93bd56087a2..6a1ca3176ad 100644
--- a/docs/en/operations/troubleshooting.md
+++ b/docs/en/operations/troubleshooting.md
@@ -17,6 +17,33 @@ title: Troubleshooting
- Check firewall settings.
- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package.
+### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get}
+
+- The issue may be happened when the GPG key is changed.
+
+Please use the following scripts to resolve the issue:
+
+```bash
+sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+sudo apt-get update
+```
+
+### You Get the Unsupported Architecture Warning with Apt-get {#you-get-the-unsupported-architecture-warning-with-apt-get}
+
+- The completed warning message is as follows:
+
+```
+N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386'
+```
+
+To resolve the above issue, please use the following script:
+
+```bash
+sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch
+sudo apt-get clean
+sudo apt-get autoclean
+```
+
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible issues:
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index eb357df19db..4a6e46e1759 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -376,14 +376,6 @@ Result:
└─────┘
```
-## UUIDStringToNum(str)
-
-Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16).
-
-## UUIDNumToString(str)
-
-Accepts a FixedString(16) value. Returns a string containing 36 characters in text format.
-
## bitmaskToList(num)
Accepts an integer. Returns a string containing the list of powers of two that total the source number when summed. They are comma-separated without spaces in text format, in ascending order.
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index b8f222c2e4e..43542367cd5 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -211,12 +211,19 @@ SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
## UUIDStringToNum
-Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
+Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default).
+
+**Syntax**
``` sql
-UUIDStringToNum(String)
+UUIDStringToNum(string[, variant = 1])
```
+**Arguments**
+
+- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
+
**Returned value**
FixedString(16)
@@ -235,14 +242,33 @@ SELECT
└──────────────────────────────────────┴──────────────────┘
```
+``` sql
+SELECT
+ '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
+ UUIDStringToNum(uuid, 2) AS bytes
+```
+
+``` text
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @&1
- PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
- PKG_PATH=${PKG_PATH##*created package: }
- exec 9>&-
+ # Preserve package path
+ exec 9>&1
+ PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
+ PKG_PATH=${PKG_PATH##*created package: }
+ exec 9>&-
+ fi
if [ -n "$MAKE_APK" ]; then
echo "Building apk package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk
fi
+ if [ -n "$MAKE_ARCHLINUX" ]; then
+ echo "Building archlinux package for $config"
+ nfpm package --target "$OUTPUT_DIR" --config "$config" --packager archlinux
+ fi
if [ -n "$MAKE_RPM" ]; then
echo "Building rpm package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm
diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml
index 7803729c469..8f319c97b65 100644
--- a/packages/clickhouse-keeper.yaml
+++ b/packages/clickhouse-keeper.yaml
@@ -27,8 +27,8 @@ deb:
Source: clickhouse
contents:
-- src: root/etc/clickhouse-keeper
- dst: /etc/clickhouse-keeper
+- src: root/etc/clickhouse-keeper/keeper_config.xml
+ dst: /etc/clickhouse-keeper/keeper_config.xml
type: config
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml
index a94ad1e9169..b0778e6bf72 100644
--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@@ -42,8 +42,11 @@ deb:
Source: clickhouse
contents:
-- src: root/etc/clickhouse-server
- dst: /etc/clickhouse-server
+- src: root/etc/clickhouse-server/config.xml
+ dst: /etc/clickhouse-server/config.xml
+ type: config
+- src: root/etc/clickhouse-server/users.xml
+ dst: /etc/clickhouse-server/users.xml
type: config
- src: clickhouse-server.init
dst: /etc/init.d/clickhouse-server
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 070f86aaad2..e5fd4d6bf8d 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -8,9 +8,10 @@
#include
#include
#include
+#include
+#include
#include
#include
-#include
#include
#include
#include
@@ -591,6 +592,18 @@ void LocalServer::processConfig()
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
+#if USE_EMBEDDED_COMPILER
+ /// 128 MB
+ constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
+ size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
+
+ constexpr size_t compiled_expression_cache_elements_size_default = 10000;
+ size_t compiled_expression_cache_elements_size
+ = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
+
+ CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
+#endif
+
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
diff --git a/src/Analyzer/AggregationUtils.cpp b/src/Analyzer/AggregationUtils.cpp
new file mode 100644
index 00000000000..a73df87f9c2
--- /dev/null
+++ b/src/Analyzer/AggregationUtils.cpp
@@ -0,0 +1,114 @@
+#include
+
+#include
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_AGGREGATION;
+}
+
+namespace
+{
+
+class CollectAggregateFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor
+{
+public:
+ explicit CollectAggregateFunctionNodesVisitor(QueryTreeNodes * aggregate_function_nodes_)
+ : aggregate_function_nodes(aggregate_function_nodes_)
+ {}
+
+ explicit CollectAggregateFunctionNodesVisitor(String assert_no_aggregates_place_message_)
+ : assert_no_aggregates_place_message(std::move(assert_no_aggregates_place_message_))
+ {}
+
+ void visitImpl(const QueryTreeNodePtr & node)
+ {
+ auto * function_node = node->as();
+ if (!function_node || !function_node->isAggregateFunction())
+ return;
+
+ if (!assert_no_aggregates_place_message.empty())
+ throw Exception(ErrorCodes::ILLEGAL_AGGREGATION,
+ "Aggregate function {} is found {} in query",
+ function_node->formatASTForErrorMessage(),
+ assert_no_aggregates_place_message);
+
+ if (aggregate_function_nodes)
+ aggregate_function_nodes->push_back(node);
+ }
+
+ static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
+ {
+ return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
+ }
+
+private:
+ String assert_no_aggregates_place_message;
+ QueryTreeNodes * aggregate_function_nodes = nullptr;
+};
+
+}
+
+QueryTreeNodes collectAggregateFunctionNodes(const QueryTreeNodePtr & node)
+{
+ QueryTreeNodes result;
+ CollectAggregateFunctionNodesVisitor visitor(&result);
+ visitor.visit(node);
+
+ return result;
+}
+
+void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result)
+{
+ CollectAggregateFunctionNodesVisitor visitor(&result);
+ visitor.visit(node);
+}
+
+void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message)
+{
+ CollectAggregateFunctionNodesVisitor visitor(assert_no_aggregates_place_message);
+ visitor.visit(node);
+}
+
+namespace
+{
+
+class ValidateGroupingFunctionNodesVisitor : public ConstInDepthQueryTreeVisitor
+{
+public:
+ explicit ValidateGroupingFunctionNodesVisitor(String assert_no_grouping_function_place_message_)
+ : assert_no_grouping_function_place_message(std::move(assert_no_grouping_function_place_message_))
+ {}
+
+ void visitImpl(const QueryTreeNodePtr & node)
+ {
+ auto * function_node = node->as();
+ if (function_node && function_node->getFunctionName() == "grouping")
+ throw Exception(ErrorCodes::ILLEGAL_AGGREGATION,
+ "GROUPING function {} is found {} in query",
+ function_node->formatASTForErrorMessage(),
+ assert_no_grouping_function_place_message);
+ }
+
+ static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
+ {
+ return !(child_node->getNodeType() == QueryTreeNodeType::QUERY || child_node->getNodeType() == QueryTreeNodeType::UNION);
+ }
+
+private:
+ String assert_no_grouping_function_place_message;
+};
+
+}
+
+void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message)
+{
+ ValidateGroupingFunctionNodesVisitor visitor(assert_no_grouping_function_place_message);
+ visitor.visit(node);
+}
+
+}
diff --git a/src/Analyzer/AggregationUtils.h b/src/Analyzer/AggregationUtils.h
new file mode 100644
index 00000000000..c2e53e55c04
--- /dev/null
+++ b/src/Analyzer/AggregationUtils.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include
+
+namespace DB
+{
+
+/** Collect aggregate function nodes in node children.
+ * Do not visit subqueries.
+ */
+QueryTreeNodes collectAggregateFunctionNodes(const QueryTreeNodePtr & node);
+
+/** Collect aggregate function nodes in node children and add them into result.
+ * Do not visit subqueries.
+ */
+void collectAggregateFunctionNodes(const QueryTreeNodePtr & node, QueryTreeNodes & result);
+
+/** Assert that there are no aggregate function nodes in node children.
+ * Do not visit subqueries.
+ */
+void assertNoAggregateFunctionNodes(const QueryTreeNodePtr & node, const String & assert_no_aggregates_place_message);
+
+/** Assert that there are no GROUPING functions in node children.
+ * Do not visit subqueries.
+ */
+void assertNoGroupingFunction(const QueryTreeNodePtr & node, const String & assert_no_grouping_function_place_message);
+
+}
diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp
new file mode 100644
index 00000000000..2157b5edf6f
--- /dev/null
+++ b/src/Analyzer/ArrayJoinNode.cpp
@@ -0,0 +1,71 @@
+#include
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+namespace DB
+{
+
+ArrayJoinNode::ArrayJoinNode(QueryTreeNodePtr table_expression_, QueryTreeNodePtr join_expressions_, bool is_left_)
+ : IQueryTreeNode(children_size)
+ , is_left(is_left_)
+{
+ children[table_expression_child_index] = std::move(table_expression_);
+ children[join_expressions_child_index] = std::move(join_expressions_);
+}
+
+void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this);
+ buffer << ", is_left: " << is_left;
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n";
+ getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSIONS\n";
+ getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4);
+}
+
+bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return is_left == rhs_typed.is_left;
+}
+
+void ArrayJoinNode::updateTreeHashImpl(HashState & state) const
+{
+ state.update(is_left);
+}
+
+QueryTreeNodePtr ArrayJoinNode::cloneImpl() const
+{
+ return std::make_shared(getTableExpression(), getJoinExpressionsNode(), is_left);
+}
+
+ASTPtr ArrayJoinNode::toASTImpl() const
+{
+ auto array_join_ast = std::make_shared();
+ array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
+
+ const auto & join_expression_list_node = getJoinExpressionsNode();
+ array_join_ast->children.push_back(join_expression_list_node->toAST());
+ array_join_ast->expression_list = array_join_ast->children.back();
+
+ ASTPtr tables_in_select_query_ast = std::make_shared();
+ addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index]);
+
+ auto array_join_query_element_ast = std::make_shared();
+ array_join_query_element_ast->children.push_back(std::move(array_join_ast));
+ array_join_query_element_ast->array_join = array_join_query_element_ast->children.back();
+
+ tables_in_select_query_ast->children.push_back(std::move(array_join_query_element_ast));
+
+ return tables_in_select_query_ast;
+}
+
+}
diff --git a/src/Analyzer/ArrayJoinNode.h b/src/Analyzer/ArrayJoinNode.h
new file mode 100644
index 00000000000..50d53df465a
--- /dev/null
+++ b/src/Analyzer/ArrayJoinNode.h
@@ -0,0 +1,113 @@
+#pragma once
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+namespace DB
+{
+
+/** Array join node represents array join in query tree.
+ *
+ * In query tree array join expressions are represented by list query tree node.
+ *
+ * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a.
+ *
+ * Multiple expressions can be inside single array join.
+ * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a, [4, 5, 6] as b.
+ * Example: SELECT id FROM test_table ARRAY JOIN array_column_1 AS value_1, array_column_2 AS value_2.
+ *
+ * Multiple array joins can be inside JOIN TREE.
+ * Example: SELECT id FROM test_table ARRAY JOIN array_column_1 ARRAY JOIN array_column_2.
+ *
+ * Array join can be used inside JOIN TREE with ordinary JOINS.
+ * Example: SELECT t1.id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id ARRAY JOIN [1,2,3];
+ * Example: SELECT t1.id FROM test_table_1 AS t1 ARRAY JOIN [1,2,3] INNER JOIN test_table_2 AS t2 ON t1.id = t2.id;
+ */
+class ArrayJoinNode;
+using ArrayJoinNodePtr = std::shared_ptr;
+
+class ArrayJoinNode final : public IQueryTreeNode
+{
+public:
+ /** Construct array join node with table expression.
+ * Example: SELECT id FROM test_table ARRAY JOIN [1, 2, 3] as a.
+ * test_table - table expression.
+ * join_expression_list - list of array join expressions.
+ */
+ ArrayJoinNode(QueryTreeNodePtr table_expression_, QueryTreeNodePtr join_expressions_, bool is_left_);
+
+ /// Get table expression
+ const QueryTreeNodePtr & getTableExpression() const
+ {
+ return children[table_expression_child_index];
+ }
+
+ /// Get table expression
+ QueryTreeNodePtr & getTableExpression()
+ {
+ return children[table_expression_child_index];
+ }
+
+ /// Get join expressions
+ const ListNode & getJoinExpressions() const
+ {
+ return children[join_expressions_child_index]->as();
+ }
+
+ /// Get join expressions
+ ListNode & getJoinExpressions()
+ {
+ return children[join_expressions_child_index]->as();
+ }
+
+ /// Get join expressions node
+ const QueryTreeNodePtr & getJoinExpressionsNode() const
+ {
+ return children[join_expressions_child_index];
+ }
+
+ /// Get join expressions node
+ QueryTreeNodePtr & getJoinExpressionsNode()
+ {
+ return children[join_expressions_child_index];
+ }
+
+ /// Returns true if array join is left, false otherwise
+ bool isLeft() const
+ {
+ return is_left;
+ }
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::ARRAY_JOIN;
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ bool is_left = false;
+
+ static constexpr size_t table_expression_child_index = 0;
+ static constexpr size_t join_expressions_child_index = 1;
+ static constexpr size_t children_size = join_expressions_child_index + 1;
+};
+
+}
+
diff --git a/src/Analyzer/CMakeLists.txt b/src/Analyzer/CMakeLists.txt
new file mode 100644
index 00000000000..766767b5c13
--- /dev/null
+++ b/src/Analyzer/CMakeLists.txt
@@ -0,0 +1,7 @@
+if (ENABLE_TESTS)
+ add_subdirectory(tests)
+endif()
+
+if (ENABLE_EXAMPLES)
+ add_subdirectory(examples)
+endif()
diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp
new file mode 100644
index 00000000000..4d0d349dabb
--- /dev/null
+++ b/src/Analyzer/ColumnNode.cpp
@@ -0,0 +1,97 @@
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+ColumnNode::ColumnNode(NameAndTypePair column_, QueryTreeNodePtr expression_node_, QueryTreeNodeWeakPtr column_source_)
+ : IQueryTreeNode(children_size, weak_pointers_size)
+ , column(std::move(column_))
+{
+ children[expression_child_index] = std::move(expression_node_);
+ getSourceWeakPointer() = std::move(column_source_);
+}
+
+ColumnNode::ColumnNode(NameAndTypePair column_, QueryTreeNodeWeakPtr column_source_)
+ : ColumnNode(std::move(column_), nullptr /*expression_node*/, std::move(column_source_))
+{
+}
+
+QueryTreeNodePtr ColumnNode::getColumnSource() const
+{
+ auto lock = getSourceWeakPointer().lock();
+ if (!lock)
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Column {} {} query tree node does not have valid source node",
+ column.name,
+ column.type->getName());
+
+ return lock;
+}
+
+QueryTreeNodePtr ColumnNode::getColumnSourceOrNull() const
+{
+ return getSourceWeakPointer().lock();
+}
+
+void ColumnNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "COLUMN id: " << state.getNodeId(this);
+
+ if (hasAlias())
+ buffer << ", alias: " << getAlias();
+
+ buffer << ", column_name: " << column.name << ", result_type: " << column.type->getName();
+
+ auto column_source_ptr = getSourceWeakPointer().lock();
+ if (column_source_ptr)
+ buffer << ", source_id: " << state.getNodeId(column_source_ptr.get());
+
+ const auto & expression = getExpression();
+
+ if (expression)
+ {
+ buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n";
+ expression->dumpTreeImpl(buffer, state, indent + 4);
+ }
+}
+
+bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return column == rhs_typed.column;
+}
+
+void ColumnNode::updateTreeHashImpl(HashState & hash_state) const
+{
+ hash_state.update(column.name.size());
+ hash_state.update(column.name);
+
+ const auto & column_type_name = column.type->getName();
+ hash_state.update(column_type_name.size());
+ hash_state.update(column_type_name);
+}
+
+QueryTreeNodePtr ColumnNode::cloneImpl() const
+{
+ return std::make_shared(column, getColumnSource());
+}
+
+ASTPtr ColumnNode::toASTImpl() const
+{
+ return std::make_shared(column.name);
+}
+
+}
diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h
new file mode 100644
index 00000000000..fbd788ae6fd
--- /dev/null
+++ b/src/Analyzer/ColumnNode.h
@@ -0,0 +1,156 @@
+#pragma once
+
+#include
+
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+/** Column node represents column in query tree.
+ * Column node can have weak pointer to its column source.
+ * Column source can be table expression, lambda, subquery.
+ *
+ * For table ALIAS columns. Column node must contain expression.
+ * For ARRAY JOIN join expression column. Column node must contain expression.
+ *
+ * During query analysis pass identifier node is resolved into column. See IdentifierNode.h.
+ *
+ * Examples:
+ * SELECT id FROM test_table. id is identifier that must be resolved to column node during query analysis pass.
+ * SELECT lambda(x -> x + 1, [1,2,3]). x is identifier inside lambda that must be resolved to column node during query analysis pass.
+ *
+ * Column node is initialized with column name, type and column source weak pointer.
+ * In case of ALIAS column node is initialized with column name, type, alias expression and column source weak pointer.
+ */
+class ColumnNode;
+using ColumnNodePtr = std::shared_ptr;
+
+class ColumnNode final : public IQueryTreeNode
+{
+public:
+ /// Construct column node with column name, type, column expression and column source weak pointer
+ ColumnNode(NameAndTypePair column_, QueryTreeNodePtr expression_node_, QueryTreeNodeWeakPtr column_source_);
+
+ /// Construct column node with column name, type and column source weak pointer
+ ColumnNode(NameAndTypePair column_, QueryTreeNodeWeakPtr column_source_);
+
+ /// Get column
+ const NameAndTypePair & getColumn() const
+ {
+ return column;
+ }
+
+ /// Get column name
+ const String & getColumnName() const
+ {
+ return column.name;
+ }
+
+ /// Get column type
+ const DataTypePtr & getColumnType() const
+ {
+ return column.type;
+ }
+
+ /// Set column type
+ void setColumnType(DataTypePtr column_type)
+ {
+ column.type = std::move(column_type);
+ }
+
+ /// Returns true if column node has expression, false otherwise
+ bool hasExpression() const
+ {
+ return children[expression_child_index] != nullptr;
+ }
+
+ /// Get column node expression node
+ const QueryTreeNodePtr & getExpression() const
+ {
+ return children[expression_child_index];
+ }
+
+ /// Get column node expression node
+ QueryTreeNodePtr & getExpression()
+ {
+ return children[expression_child_index];
+ }
+
+ /// Get column node expression node, if there are no expression node exception is thrown
+ QueryTreeNodePtr & getExpressionOrThrow()
+ {
+ if (!children[expression_child_index])
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Column expression is not initialized");
+
+ return children[expression_child_index];
+ }
+
+ /// Set column node expression node
+ void setExpression(QueryTreeNodePtr expression_value)
+ {
+ children[expression_child_index] = std::move(expression_value);
+ }
+
+ /** Get column source.
+ * If column source is not valid logical exception is thrown.
+ */
+ QueryTreeNodePtr getColumnSource() const;
+
+ /** Get column source.
+ * If column source is not valid null is returned.
+ */
+ QueryTreeNodePtr getColumnSourceOrNull() const;
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::COLUMN;
+ }
+
+ String getName() const override
+ {
+ return column.name;
+ }
+
+ DataTypePtr getResultType() const override
+ {
+ return column.type;
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ const QueryTreeNodeWeakPtr & getSourceWeakPointer() const
+ {
+ return weak_pointers[source_weak_pointer_index];
+ }
+
+ QueryTreeNodeWeakPtr & getSourceWeakPointer()
+ {
+ return weak_pointers[source_weak_pointer_index];
+ }
+
+ NameAndTypePair column;
+
+ static constexpr size_t expression_child_index = 0;
+ static constexpr size_t children_size = expression_child_index + 1;
+
+ static constexpr size_t source_weak_pointer_index = 0;
+ static constexpr size_t weak_pointers_size = source_weak_pointer_index + 1;
+};
+
+}
diff --git a/src/Analyzer/ColumnTransformers.cpp b/src/Analyzer/ColumnTransformers.cpp
new file mode 100644
index 00000000000..27466ce5c27
--- /dev/null
+++ b/src/Analyzer/ColumnTransformers.cpp
@@ -0,0 +1,357 @@
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+/// IColumnTransformerNode implementation
+
+const char * toString(ColumnTransfomerType type)
+{
+ switch (type)
+ {
+ case ColumnTransfomerType::APPLY: return "APPLY";
+ case ColumnTransfomerType::EXCEPT: return "EXCEPT";
+ case ColumnTransfomerType::REPLACE: return "REPLACE";
+ }
+}
+
+IColumnTransformerNode::IColumnTransformerNode(size_t children_size)
+ : IQueryTreeNode(children_size)
+{}
+
+/// ApplyColumnTransformerNode implementation
+
+const char * toString(ApplyColumnTransformerType type)
+{
+ switch (type)
+ {
+ case ApplyColumnTransformerType::LAMBDA: return "LAMBDA";
+ case ApplyColumnTransformerType::FUNCTION: return "FUNCTION";
+ }
+}
+
+ApplyColumnTransformerNode::ApplyColumnTransformerNode(QueryTreeNodePtr expression_node_)
+ : IColumnTransformerNode(children_size)
+{
+ if (expression_node_->getNodeType() == QueryTreeNodeType::LAMBDA)
+ apply_transformer_type = ApplyColumnTransformerType::LAMBDA;
+ else if (expression_node_->getNodeType() == QueryTreeNodeType::FUNCTION)
+ apply_transformer_type = ApplyColumnTransformerType::FUNCTION;
+ else
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Apply column transformer expression must be lambda or function. Actual {}",
+ expression_node_->getNodeTypeName());
+
+ children[expression_child_index] = std::move(expression_node_);
+}
+
+void ApplyColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "APPLY COLUMN TRANSFORMER id: " << format_state.getNodeId(this);
+ buffer << ", apply_transformer_type: " << toString(apply_transformer_type);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n';
+
+ const auto & expression_node = getExpressionNode();
+ expression_node->dumpTreeImpl(buffer, format_state, indent + 4);
+}
+
+bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return apply_transformer_type == rhs_typed.apply_transformer_type;
+}
+
+void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const
+{
+ hash_state.update(static_cast(getTransformerType()));
+ hash_state.update(static_cast(getApplyTransformerType()));
+}
+
+QueryTreeNodePtr ApplyColumnTransformerNode::cloneImpl() const
+{
+ return std::make_shared(getExpressionNode());
+}
+
+ASTPtr ApplyColumnTransformerNode::toASTImpl() const
+{
+ auto ast_apply_transformer = std::make_shared();
+ const auto & expression_node = getExpressionNode();
+
+ if (apply_transformer_type == ApplyColumnTransformerType::FUNCTION)
+ {
+ auto & function_expression = expression_node->as();
+ ast_apply_transformer->func_name = function_expression.getFunctionName();
+ ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST();
+ }
+ else
+ {
+ auto & lambda_expression = expression_node->as();
+ if (!lambda_expression.getArgumentNames().empty())
+ ast_apply_transformer->lambda_arg = lambda_expression.getArgumentNames()[0];
+ ast_apply_transformer->lambda = lambda_expression.toAST();
+ }
+
+ return ast_apply_transformer;
+}
+
+/// ExceptColumnTransformerNode implementation
+
+ExceptColumnTransformerNode::ExceptColumnTransformerNode(Names except_column_names_, bool is_strict_)
+ : IColumnTransformerNode(children_size)
+ , except_transformer_type(ExceptColumnTransformerType::COLUMN_LIST)
+ , except_column_names(std::move(except_column_names_))
+ , is_strict(is_strict_)
+{
+}
+
+ExceptColumnTransformerNode::ExceptColumnTransformerNode(std::shared_ptr column_matcher_)
+ : IColumnTransformerNode(children_size)
+ , except_transformer_type(ExceptColumnTransformerType::REGEXP)
+ , column_matcher(std::move(column_matcher_))
+{
+}
+
+bool ExceptColumnTransformerNode::isColumnMatching(const std::string & column_name) const
+{
+ if (column_matcher)
+ return RE2::PartialMatch(column_name, *column_matcher);
+
+ for (const auto & name : except_column_names)
+ if (column_name == name)
+ return true;
+
+ return false;
+}
+
+const char * toString(ExceptColumnTransformerType type)
+{
+ switch (type)
+ {
+ case ExceptColumnTransformerType::REGEXP:
+ return "REGEXP";
+ case ExceptColumnTransformerType::COLUMN_LIST:
+ return "COLUMN_LIST";
+ }
+}
+
+void ExceptColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "EXCEPT COLUMN TRANSFORMER id: " << format_state.getNodeId(this);
+ buffer << ", except_transformer_type: " << toString(except_transformer_type);
+
+ if (column_matcher)
+ {
+ buffer << ", pattern: " << column_matcher->pattern();
+ return;
+ }
+ else
+ {
+ buffer << ", identifiers: ";
+
+ size_t except_column_names_size = except_column_names.size();
+ for (size_t i = 0; i < except_column_names_size; ++i)
+ {
+ buffer << except_column_names[i];
+
+ if (i + 1 != except_column_names_size)
+ buffer << ", ";
+ }
+ }
+}
+
+bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ if (except_transformer_type != rhs_typed.except_transformer_type ||
+ is_strict != rhs_typed.is_strict ||
+ except_column_names != rhs_typed.except_column_names)
+ return false;
+
+ const auto & rhs_column_matcher = rhs_typed.column_matcher;
+
+ if (!column_matcher && !rhs_column_matcher)
+ return true;
+ else if (column_matcher && !rhs_column_matcher)
+ return false;
+ else if (!column_matcher && rhs_column_matcher)
+ return false;
+
+ return column_matcher->pattern() == rhs_column_matcher->pattern();
+}
+
+void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const
+{
+ hash_state.update(static_cast(getTransformerType()));
+ hash_state.update(static_cast(getExceptTransformerType()));
+
+ hash_state.update(except_column_names.size());
+
+ for (const auto & column_name : except_column_names)
+ {
+ hash_state.update(column_name.size());
+ hash_state.update(column_name);
+ }
+
+ if (column_matcher)
+ {
+ const auto & pattern = column_matcher->pattern();
+ hash_state.update(pattern.size());
+ hash_state.update(pattern);
+ }
+}
+
+QueryTreeNodePtr ExceptColumnTransformerNode::cloneImpl() const
+{
+ if (except_transformer_type == ExceptColumnTransformerType::REGEXP)
+ return std::make_shared(column_matcher);
+
+ return std::make_shared(except_column_names, is_strict);
+}
+
+ASTPtr ExceptColumnTransformerNode::toASTImpl() const
+{
+ auto ast_except_transformer = std::make_shared();
+
+ if (column_matcher)
+ {
+ ast_except_transformer->setPattern(column_matcher->pattern());
+ return ast_except_transformer;
+ }
+
+ ast_except_transformer->children.reserve(except_column_names.size());
+ for (const auto & name : except_column_names)
+ ast_except_transformer->children.push_back(std::make_shared(name));
+
+ return ast_except_transformer;
+}
+
+/// ReplaceColumnTransformerNode implementation
+
+ReplaceColumnTransformerNode::ReplaceColumnTransformerNode(const std::vector & replacements_, bool is_strict_)
+ : IColumnTransformerNode(children_size)
+ , is_strict(is_strict_)
+{
+ children[replacements_child_index] = std::make_shared();
+
+ auto & replacement_expressions_nodes = getReplacements().getNodes();
+
+ std::unordered_set replacement_names_set;
+
+ for (const auto & replacement : replacements_)
+ {
+ auto [_, inserted] = replacement_names_set.emplace(replacement.column_name);
+
+ if (!inserted)
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Expressions in column transformer replace should not contain same replacement {} more than once",
+ replacement.column_name);
+
+ replacements_names.push_back(replacement.column_name);
+ replacement_expressions_nodes.push_back(replacement.expression_node);
+ }
+}
+
+QueryTreeNodePtr ReplaceColumnTransformerNode::findReplacementExpression(const std::string & expression_name)
+{
+ auto it = std::find(replacements_names.begin(), replacements_names.end(), expression_name);
+ if (it == replacements_names.end())
+ return {};
+
+ size_t replacement_index = it - replacements_names.begin();
+ auto & replacement_expressions_nodes = getReplacements().getNodes();
+ return replacement_expressions_nodes[replacement_index];
+}
+
+void ReplaceColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "REPLACE COLUMN TRANSFORMER id: " << format_state.getNodeId(this);
+
+ const auto & replacements_nodes = getReplacements().getNodes();
+ size_t replacements_size = replacements_nodes.size();
+ buffer << '\n' << std::string(indent + 2, ' ') << "REPLACEMENTS " << replacements_size << '\n';
+
+ for (size_t i = 0; i < replacements_size; ++i)
+ {
+ const auto & replacement_name = replacements_names[i];
+ buffer << std::string(indent + 4, ' ') << "REPLACEMENT NAME " << replacement_name;
+ buffer << " EXPRESSION" << '\n';
+ const auto & expression_node = replacements_nodes[i];
+ expression_node->dumpTreeImpl(buffer, format_state, indent + 6);
+
+ if (i + 1 != replacements_size)
+ buffer << '\n';
+ }
+}
+
+bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return is_strict == rhs_typed.is_strict && replacements_names == rhs_typed.replacements_names;
+}
+
+void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const
+{
+ hash_state.update(static_cast(getTransformerType()));
+
+ const auto & replacement_expressions_nodes = getReplacements().getNodes();
+ size_t replacements_size = replacement_expressions_nodes.size();
+ hash_state.update(replacements_size);
+
+ for (size_t i = 0; i < replacements_size; ++i)
+ {
+ const auto & replacement_name = replacements_names[i];
+ hash_state.update(replacement_name.size());
+ hash_state.update(replacement_name);
+ }
+}
+
+QueryTreeNodePtr ReplaceColumnTransformerNode::cloneImpl() const
+{
+ auto result_replace_transformer = std::make_shared(std::vector{}, false);
+
+ result_replace_transformer->is_strict = is_strict;
+ result_replace_transformer->replacements_names = replacements_names;
+
+ return result_replace_transformer;
+}
+
+ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
+{
+ auto ast_replace_transformer = std::make_shared();
+
+ const auto & replacement_expressions_nodes = getReplacements().getNodes();
+ size_t replacements_size = replacement_expressions_nodes.size();
+
+ ast_replace_transformer->children.reserve(replacements_size);
+
+ for (size_t i = 0; i < replacements_size; ++i)
+ {
+ auto replacement_ast = std::make_shared();
+ replacement_ast->name = replacements_names[i];
+ replacement_ast->expr = replacement_expressions_nodes[i]->toAST();
+ ast_replace_transformer->children.push_back(replacement_ast);
+ }
+
+ return ast_replace_transformer;
+}
+
+}
diff --git a/src/Analyzer/ColumnTransformers.h b/src/Analyzer/ColumnTransformers.h
new file mode 100644
index 00000000000..e96e606d923
--- /dev/null
+++ b/src/Analyzer/ColumnTransformers.h
@@ -0,0 +1,316 @@
+#pragma once
+
+#include
+
+#include
+#include
+#include
+
+namespace DB
+{
+
+/** Transformers are query tree nodes that handle additional logic that you can apply after MatcherQueryTreeNode is resolved.
+ * Check MatcherQueryTreeNode.h before reading this documentation.
+ *
+ * They main purpose is to apply some logic for expressions after matcher is resolved.
+ * There are 3 types of transformers:
+ *
+ * 1. APPLY transformer:
+ * APPLY transformer transform matched expression using lambda or function into another expression.
+ * It has 2 syntax variants:
+ * 1. lambda variant: SELECT matcher APPLY (x -> expr(x)).
+ * 2. function variant: SELECT matcher APPLY function_name(optional_parameters).
+ *
+ * 2. EXCEPT transformer:
+ * EXCEPT transformer discard some columns.
+ * It has 2 syntax variants:
+ * 1. regexp variant: SELECT matcher EXCEPT ('regexp').
+ * 2. column names list variant: SELECT matcher EXCEPT (column_name_1, ...).
+ *
+ * 3. REPLACE transformer:
+ * REPLACE transformer applies similar transformation as APPLY transformer, but only for expressions
+ * that match replacement expression name.
+ *
+ * Example:
+ * CREATE TABLE test_table (id UInt64) ENGINE=TinyLog;
+ * SELECT * REPLACE (id + 1 AS id) FROM test_table.
+ * This query is transformed into SELECT id + 1 FROM test_table.
+ * It is important that AS id is not alias, it is replacement name. id + 1 is replacement expression.
+ *
+ * REPLACE transformer cannot contain multiple replacements with same name.
+ *
+ * REPLACE transformer expression does not necessary include replacement column name.
+ * Example:
+ * SELECT * REPLACE (1 AS id) FROM test_table.
+ *
+ * REPLACE transformer expression does not throw exception if there are no columns to apply replacement.
+ * Example:
+ * SELECT * REPLACE (1 AS unknown_column) FROM test_table;
+ *
+ * REPLACE transform can contain multiple replacements.
+ * Example:
+ * SELECT * REPLACE (1 AS id, 2 AS value).
+ *
+ * Matchers can be combined together and chained.
+ * Example:
+ * SELECT * EXCEPT (id) APPLY (x -> toString(x)) APPLY (x -> length(x)) FROM test_table.
+ */
+
+/// Column transformer type
+enum class ColumnTransfomerType
+{
+ APPLY,
+ EXCEPT,
+ REPLACE
+};
+
+/// Get column transformer type name
+const char * toString(ColumnTransfomerType type);
+
+class IColumnTransformerNode;
+using ColumnTransformerNodePtr = std::shared_ptr;
+using ColumnTransformersNodes = std::vector;
+
+/// IColumnTransformer base interface.
+class IColumnTransformerNode : public IQueryTreeNode
+{
+public:
+ /// Get transformer type
+ virtual ColumnTransfomerType getTransformerType() const = 0;
+
+ /// Get transformer type name
+ const char * getTransformerTypeName() const
+ {
+ return toString(getTransformerType());
+ }
+
+ QueryTreeNodeType getNodeType() const final
+ {
+ return QueryTreeNodeType::TRANSFORMER;
+ }
+
+protected:
+ /// Construct column transformer node and resize children to children size
+ explicit IColumnTransformerNode(size_t children_size);
+};
+
+enum class ApplyColumnTransformerType
+{
+ LAMBDA,
+ FUNCTION
+};
+
+/// Get apply column transformer type name
+const char * toString(ApplyColumnTransformerType type);
+
+class ApplyColumnTransformerNode;
+using ApplyColumnTransformerNodePtr = std::shared_ptr;
+
+/// Apply column transformer
+class ApplyColumnTransformerNode final : public IColumnTransformerNode
+{
+public:
+ /** Initialize apply column transformer with expression node.
+ * Expression node must be lambda or function otherwise exception is thrown.
+ */
+ explicit ApplyColumnTransformerNode(QueryTreeNodePtr expression_node_);
+
+ /// Get apply transformer type
+ ApplyColumnTransformerType getApplyTransformerType() const
+ {
+ return apply_transformer_type;
+ }
+
+ /// Get apply transformer expression node
+ const QueryTreeNodePtr & getExpressionNode() const
+ {
+ return children[expression_child_index];
+ }
+
+ ColumnTransfomerType getTransformerType() const override
+ {
+ return ColumnTransfomerType::APPLY;
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ ApplyColumnTransformerType apply_transformer_type = ApplyColumnTransformerType::LAMBDA;
+
+ static constexpr size_t expression_child_index = 0;
+ static constexpr size_t children_size = expression_child_index + 1;
+};
+
+/// Except column transformer type
+enum class ExceptColumnTransformerType
+{
+ REGEXP,
+ COLUMN_LIST,
+};
+
+const char * toString(ExceptColumnTransformerType type);
+
+class ExceptColumnTransformerNode;
+using ExceptColumnTransformerNodePtr = std::shared_ptr;
+
+/** Except column transformer.
+ * Strict EXCEPT column transformer must use all column names during matched nodes transformation.
+ *
+ * Example:
+ * CREATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog;
+ * SELECT * EXCEPT STRICT (id, value1) FROM test_table;
+ * Such query will throw exception because column with name `value1` was not matched by strict EXCEPT transformer.
+ *
+ * Strict is valid only for EXCEPT COLUMN_LIST transformer.
+ */
+class ExceptColumnTransformerNode final : public IColumnTransformerNode
+{
+public:
+ /// Initialize except column transformer with column names
+ explicit ExceptColumnTransformerNode(Names except_column_names_, bool is_strict_);
+
+ /// Initialize except column transformer with regexp column matcher
+ explicit ExceptColumnTransformerNode(std::shared_ptr column_matcher_);
+
+ /// Get except transformer type
+ ExceptColumnTransformerType getExceptTransformerType() const
+ {
+ return except_transformer_type;
+ }
+
+ /** Returns true if except column transformer is strict, false otherwise.
+ * Valid only for EXCEPT COLUMN_LIST transformer.
+ */
+ bool isStrict() const
+ {
+ return is_strict;
+ }
+
+ /// Returns true if except transformer match column name, false otherwise.
+ bool isColumnMatching(const std::string & column_name) const;
+
+ /** Get except column names.
+ * Valid only for column list except transformer.
+ */
+ const Names & getExceptColumnNames() const
+ {
+ return except_column_names;
+ }
+
+ ColumnTransfomerType getTransformerType() const override
+ {
+ return ColumnTransfomerType::EXCEPT;
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ ExceptColumnTransformerType except_transformer_type;
+ Names except_column_names;
+ std::shared_ptr column_matcher;
+ bool is_strict = false;
+
+ static constexpr size_t children_size = 0;
+};
+
+class ReplaceColumnTransformerNode;
+using ReplaceColumnTransformerNodePtr = std::shared_ptr;
+
+/** Replace column transformer.
+ * Strict replace column transformer must use all replacements during matched nodes transformation.
+ *
+ * Example:
+ * CREATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog;
+ * SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table;
+ * Such query will throw exception because column with name `value1` was not matched by strict REPLACE transformer.
+ */
+class ReplaceColumnTransformerNode final : public IColumnTransformerNode
+{
+public:
+ /// Replacement is column name and replace expression
+ struct Replacement
+ {
+ std::string column_name;
+ QueryTreeNodePtr expression_node;
+ };
+
+ /// Initialize replace column transformer with replacements
+ explicit ReplaceColumnTransformerNode(const std::vector & replacements_, bool is_strict);
+
+ ColumnTransfomerType getTransformerType() const override
+ {
+ return ColumnTransfomerType::REPLACE;
+ }
+
+ /// Get replacements
+ const ListNode & getReplacements() const
+ {
+ return children[replacements_child_index]->as();
+ }
+
+ /// Get replacements node
+ const QueryTreeNodePtr & getReplacementsNode() const
+ {
+ return children[replacements_child_index];
+ }
+
+ /// Get replacements names
+ const Names & getReplacementsNames() const
+ {
+ return replacements_names;
+ }
+
+ /// Returns true if replace column transformer is strict, false otherwise
+ bool isStrict() const
+ {
+ return is_strict;
+ }
+
+ /** Returns replacement expression if replacement is registered for expression name, null otherwise.
+ * Returned replacement expression must be cloned by caller.
+ */
+ QueryTreeNodePtr findReplacementExpression(const std::string & expression_name);
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ ListNode & getReplacements()
+ {
+ return children[replacements_child_index]->as();
+ }
+
+ Names replacements_names;
+ bool is_strict = false;
+
+ static constexpr size_t replacements_child_index = 0;
+ static constexpr size_t children_size = replacements_child_index + 1;
+};
+
+}
diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp
new file mode 100644
index 00000000000..b7de2acb5a4
--- /dev/null
+++ b/src/Analyzer/ConstantNode.cpp
@@ -0,0 +1,71 @@
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+namespace DB
+{
+
+ConstantNode::ConstantNode(ConstantValuePtr constant_value_)
+ : IQueryTreeNode(children_size)
+ , constant_value(std::move(constant_value_))
+ , value_string(applyVisitor(FieldVisitorToString(), constant_value->getValue()))
+{
+}
+
+ConstantNode::ConstantNode(Field value_, DataTypePtr value_data_type_)
+ : ConstantNode(std::make_shared(convertFieldToTypeOrThrow(value_, *value_data_type_), value_data_type_))
+{}
+
+ConstantNode::ConstantNode(Field value_)
+ : ConstantNode(value_, applyVisitor(FieldToDataType(), value_))
+{}
+
+void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this);
+
+ if (hasAlias())
+ buffer << ", alias: " << getAlias();
+
+ buffer << ", constant_value: " << constant_value->getValue().dump();
+ buffer << ", constant_value_type: " << constant_value->getType()->getName();
+}
+
+bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string;
+}
+
+void ConstantNode::updateTreeHashImpl(HashState & hash_state) const
+{
+ auto type_name = constant_value->getType()->getName();
+ hash_state.update(type_name.size());
+ hash_state.update(type_name);
+
+ hash_state.update(value_string.size());
+ hash_state.update(value_string);
+}
+
+QueryTreeNodePtr ConstantNode::cloneImpl() const
+{
+ return std::make_shared(constant_value);
+}
+
+ASTPtr ConstantNode::toASTImpl() const
+{
+ return std::make_shared(constant_value->getValue());
+}
+
+}
diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h
new file mode 100644
index 00000000000..29e8cd25532
--- /dev/null
+++ b/src/Analyzer/ConstantNode.h
@@ -0,0 +1,82 @@
+#pragma once
+
+#include
+
+#include
+
+namespace DB
+{
+
+/** Constant node represents constant value in query tree.
+ * Constant value must be representable by Field.
+ * Examples: 1, 'constant_string', [1,2,3].
+ */
+class ConstantNode;
+using ConstantNodePtr = std::shared_ptr;
+
+class ConstantNode final : public IQueryTreeNode
+{
+public:
+ /// Construct constant query tree node from constant value
+ explicit ConstantNode(ConstantValuePtr constant_value_);
+
+ /** Construct constant query tree node from field and data type.
+ *
+ * Throws exception if value cannot be converted to value data type.
+ */
+ explicit ConstantNode(Field value_, DataTypePtr value_data_type_);
+
+ /// Construct constant query tree node from field, data type will be derived from field value
+ explicit ConstantNode(Field value_);
+
+ /// Get constant value
+ const Field & getValue() const
+ {
+ return constant_value->getValue();
+ }
+
+ /// Get constant value string representation
+ const String & getValueStringRepresentation() const
+ {
+ return value_string;
+ }
+
+ ConstantValuePtr getConstantValueOrNull() const override
+ {
+ return constant_value;
+ }
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::CONSTANT;
+ }
+
+ String getName() const override
+ {
+ return value_string;
+ }
+
+ DataTypePtr getResultType() const override
+ {
+ return constant_value->getType();
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ ConstantValuePtr constant_value;
+ String value_string;
+
+ static constexpr size_t children_size = 0;
+};
+
+}
diff --git a/src/Analyzer/ConstantValue.h b/src/Analyzer/ConstantValue.h
new file mode 100644
index 00000000000..a9e2ffd9e65
--- /dev/null
+++ b/src/Analyzer/ConstantValue.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include
+#include
+
+namespace DB
+{
+
+/** Immutable constant value representation during analysis stage.
+ * Some query nodes can be represented by constant (scalar subqueries, functions with constant arguments).
+ */
+class ConstantValue;
+using ConstantValuePtr = std::shared_ptr;
+
+class ConstantValue
+{
+public:
+ ConstantValue(Field value_, DataTypePtr data_type_)
+ : value(std::move(value_))
+ , data_type(std::move(data_type_))
+ {}
+
+ const Field & getValue() const
+ {
+ return value;
+ }
+
+ const DataTypePtr & getType() const
+ {
+ return data_type;
+ }
+private:
+ Field value;
+ DataTypePtr data_type;
+};
+
+inline bool operator==(const ConstantValue & lhs, const ConstantValue & rhs)
+{
+ return lhs.getValue() == rhs.getValue() && lhs.getType()->equals(*rhs.getType());
+}
+
+inline bool operator!=(const ConstantValue & lhs, const ConstantValue & rhs)
+{
+ return !(lhs == rhs);
+}
+
+}
diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp
new file mode 100644
index 00000000000..7468141b3d5
--- /dev/null
+++ b/src/Analyzer/FunctionNode.cpp
@@ -0,0 +1,215 @@
+#include
+
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+#include
+
+namespace DB
+{
+
+FunctionNode::FunctionNode(String function_name_)
+ : IQueryTreeNode(children_size)
+ , function_name(function_name_)
+{
+ children[parameters_child_index] = std::make_shared();
+ children[arguments_child_index] = std::make_shared();
+}
+
+void FunctionNode::resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value)
+{
+ aggregate_function = nullptr;
+ function = std::move(function_value);
+ result_type = std::move(result_type_value);
+ function_name = function->getName();
+}
+
+void FunctionNode::resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value)
+{
+ function = nullptr;
+ aggregate_function = std::move(aggregate_function_value);
+ result_type = std::move(result_type_value);
+ function_name = aggregate_function->getName();
+}
+
+void FunctionNode::resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value)
+{
+ resolveAsAggregateFunction(window_function_value, result_type_value);
+}
+
+void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "FUNCTION id: " << format_state.getNodeId(this);
+
+ if (hasAlias())
+ buffer << ", alias: " << getAlias();
+
+ buffer << ", function_name: " << function_name;
+
+ std::string function_type = "ordinary";
+ if (isAggregateFunction())
+ function_type = "aggregate";
+ else if (isWindowFunction())
+ function_type = "window";
+
+ buffer << ", function_type: " << function_type;
+
+ if (result_type)
+ buffer << ", result_type: " + result_type->getName();
+
+ if (constant_value)
+ {
+ buffer << ", constant_value: " << constant_value->getValue().dump();
+ buffer << ", constant_value_type: " << constant_value->getType()->getName();
+ }
+
+ const auto & parameters = getParameters();
+ if (!parameters.getNodes().empty())
+ {
+ buffer << '\n' << std::string(indent + 2, ' ') << "PARAMETERS\n";
+ parameters.dumpTreeImpl(buffer, format_state, indent + 4);
+ }
+
+ const auto & arguments = getArguments();
+ if (!arguments.getNodes().empty())
+ {
+ buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n";
+ arguments.dumpTreeImpl(buffer, format_state, indent + 4);
+ }
+
+ if (hasWindow())
+ {
+ buffer << '\n' << std::string(indent + 2, ' ') << "WINDOW\n";
+ getWindowNode()->dumpTreeImpl(buffer, format_state, indent + 4);
+ }
+}
+
+String FunctionNode::getName() const
+{
+ String name = function_name;
+
+ const auto & parameters = getParameters();
+ const auto & parameters_nodes = parameters.getNodes();
+ if (!parameters_nodes.empty())
+ {
+ name += '(';
+ name += parameters.getName();
+ name += ')';
+ }
+
+ const auto & arguments = getArguments();
+ name += '(';
+ name += arguments.getName();
+ name += ')';
+
+ return name;
+}
+
+bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ if (function_name != rhs_typed.function_name ||
+ isAggregateFunction() != rhs_typed.isAggregateFunction() ||
+ isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() ||
+ isWindowFunction() != rhs_typed.isWindowFunction())
+ return false;
+
+ if (result_type && rhs_typed.result_type && !result_type->equals(*rhs_typed.getResultType()))
+ return false;
+ else if (result_type && !rhs_typed.result_type)
+ return false;
+ else if (!result_type && rhs_typed.result_type)
+ return false;
+
+ if (constant_value && rhs_typed.constant_value && *constant_value != *rhs_typed.constant_value)
+ return false;
+ else if (constant_value && !rhs_typed.constant_value)
+ return false;
+ else if (!constant_value && rhs_typed.constant_value)
+ return false;
+
+ return true;
+}
+
+void FunctionNode::updateTreeHashImpl(HashState & hash_state) const
+{
+ hash_state.update(function_name.size());
+ hash_state.update(function_name);
+ hash_state.update(isOrdinaryFunction());
+ hash_state.update(isAggregateFunction());
+ hash_state.update(isWindowFunction());
+
+ if (result_type)
+ {
+ auto result_type_name = result_type->getName();
+ hash_state.update(result_type_name.size());
+ hash_state.update(result_type_name);
+ }
+
+ if (constant_value)
+ {
+ auto constant_dump = applyVisitor(FieldVisitorToString(), constant_value->getValue());
+ hash_state.update(constant_dump.size());
+ hash_state.update(constant_dump);
+
+ auto constant_value_type_name = constant_value->getType()->getName();
+ hash_state.update(constant_value_type_name.size());
+ hash_state.update(constant_value_type_name);
+ }
+}
+
+QueryTreeNodePtr FunctionNode::cloneImpl() const
+{
+ auto result_function = std::make_shared(function_name);
+
+ /** This is valid for clone method to reuse same function pointers
+ * because ordinary functions or aggregate functions must be stateless.
+ */
+ result_function->function = function;
+ result_function->aggregate_function = aggregate_function;
+ result_function->result_type = result_type;
+ result_function->constant_value = constant_value;
+
+ return result_function;
+}
+
+ASTPtr FunctionNode::toASTImpl() const
+{
+ auto function_ast = std::make_shared();
+
+ function_ast->name = function_name;
+ function_ast->is_window_function = isWindowFunction();
+
+ const auto & parameters = getParameters();
+ if (!parameters.getNodes().empty())
+ {
+ function_ast->children.push_back(parameters.toAST());
+ function_ast->parameters = function_ast->children.back();
+ }
+
+ const auto & arguments = getArguments();
+ function_ast->children.push_back(arguments.toAST());
+ function_ast->arguments = function_ast->children.back();
+
+ auto window_node = getWindowNode();
+ if (window_node)
+ {
+ if (auto * identifier_node = window_node->as())
+ function_ast->window_name = identifier_node->getIdentifier().getFullName();
+ else
+ function_ast->window_definition = window_node->toAST();
+ }
+
+ return function_ast;
+}
+
+}
diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h
new file mode 100644
index 00000000000..18b4c6d445c
--- /dev/null
+++ b/src/Analyzer/FunctionNode.h
@@ -0,0 +1,232 @@
+#pragma once
+
+#include
+#include
+#include
+
+namespace DB
+{
+
+class IFunctionOverloadResolver;
+using FunctionOverloadResolverPtr = std::shared_ptr;
+
+class IAggregateFunction;
+using AggregateFunctionPtr = std::shared_ptr;
+
+/** Function node represents function in query tree.
+ * Function syntax: function_name(parameter_1, ...)(argument_1, ...).
+ * If function does not have parameters its syntax is function_name(argument_1, ...).
+ * If function does not have arguments its syntax is function_name().
+ *
+ * In query tree function parameters and arguments are represented by ListNode.
+ *
+ * Function can be:
+ * 1. Aggregate function. Example: quantile(0.5)(x), sum(x).
+ * 2. Non aggregate function. Example: plus(x, x).
+ * 3. Window function. Example: sum(x) OVER (PARTITION BY expr ORDER BY expr).
+ *
+ * Initially function node is initialized with function name.
+ * For window function client must initialize function window node.
+ *
+ * During query analysis pass function must be resolved using `resolveAsFunction`, `resolveAsAggregateFunction`, `resolveAsWindowFunction` methods.
+ * Resolved function is function that has result type and is initialized with concrete aggregate or non aggregate function.
+ */
+class FunctionNode;
+using FunctionNodePtr = std::shared_ptr;
+
+class FunctionNode final : public IQueryTreeNode
+{
+public:
+ /** Initialize function node with function name.
+ * Later during query analysis pass function must be resolved.
+ */
+ explicit FunctionNode(String function_name_);
+
+ /// Get function name
+ const String & getFunctionName() const
+ {
+ return function_name;
+ }
+
+ /// Get parameters
+ const ListNode & getParameters() const
+ {
+ return children[parameters_child_index]->as();
+ }
+
+ /// Get parameters
+ ListNode & getParameters()
+ {
+ return children[parameters_child_index]->as();
+ }
+
+ /// Get parameters node
+ const QueryTreeNodePtr & getParametersNode() const
+ {
+ return children[parameters_child_index];
+ }
+
+ /// Get parameters node
+ QueryTreeNodePtr & getParametersNode()
+ {
+ return children[parameters_child_index];
+ }
+
+ /// Get arguments
+ const ListNode & getArguments() const
+ {
+ return children[arguments_child_index]->as();
+ }
+
+ /// Get arguments
+ ListNode & getArguments()
+ {
+ return children[arguments_child_index]->as();
+ }
+
+ /// Get arguments node
+ const QueryTreeNodePtr & getArgumentsNode() const
+ {
+ return children[arguments_child_index];
+ }
+
+ /// Get arguments node
+ QueryTreeNodePtr & getArgumentsNode()
+ {
+ return children[arguments_child_index];
+ }
+
+ /// Returns true if function node has window, false otherwise
+ bool hasWindow() const
+ {
+ return children[window_child_index] != nullptr;
+ }
+
+ /** Get window node.
+ * Valid only for window function node.
+ * Result window node can be identifier node or window node.
+ * 1. It can be identifier node if window function is defined as expr OVER window_name.
+ * 2. It can be window node if window function is defined as expr OVER (window_name ...).
+ */
+ const QueryTreeNodePtr & getWindowNode() const
+ {
+ return children[window_child_index];
+ }
+
+ /** Get window node.
+ * Valid only for window function node.
+ */
+ QueryTreeNodePtr & getWindowNode()
+ {
+ return children[window_child_index];
+ }
+
+ /** Get non aggregate function.
+ * If function is not resolved nullptr returned.
+ */
+ const FunctionOverloadResolverPtr & getFunction() const
+ {
+ return function;
+ }
+
+ /** Get aggregate function.
+ * If function is not resolved nullptr returned.
+ * If function is resolved as non aggregate function nullptr returned.
+ */
+ const AggregateFunctionPtr & getAggregateFunction() const
+ {
+ return aggregate_function;
+ }
+
+ /// Is function node resolved
+ bool isResolved() const
+ {
+ return result_type != nullptr && (function != nullptr || aggregate_function != nullptr);
+ }
+
+ /// Is function node window function
+ bool isWindowFunction() const
+ {
+ return getWindowNode() != nullptr;
+ }
+
+ /// Is function node aggregate function
+ bool isAggregateFunction() const
+ {
+ return aggregate_function != nullptr && !isWindowFunction();
+ }
+
+ /// Is function node ordinary function
+ bool isOrdinaryFunction() const
+ {
+ return function != nullptr;
+ }
+
+ /** Resolve function node as non aggregate function.
+ * It is important that function name is updated with resolved function name.
+ * Main motivation for this is query tree optimizations.
+ * Assume we have `multiIf` function with single condition, it can be converted to `if` function.
+ * Function name must be updated accordingly.
+ */
+ void resolveAsFunction(FunctionOverloadResolverPtr function_value, DataTypePtr result_type_value);
+
+ /** Resolve function node as aggregate function.
+ * It is important that function name is updated with resolved function name.
+ * Main motivation for this is query tree optimizations.
+ */
+ void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value);
+
+ /** Resolve function node as window function.
+ * It is important that function name is updated with resolved function name.
+ * Main motivation for this is query tree optimizations.
+ */
+ void resolveAsWindowFunction(AggregateFunctionPtr window_function_value, DataTypePtr result_type_value);
+
+ /// Perform constant folding for function node
+ void performConstantFolding(ConstantValuePtr constant_folded_value)
+ {
+ constant_value = std::move(constant_folded_value);
+ }
+
+ ConstantValuePtr getConstantValueOrNull() const override
+ {
+ return constant_value;
+ }
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::FUNCTION;
+ }
+
+ DataTypePtr getResultType() const override
+ {
+ return result_type;
+ }
+
+ String getName() const override;
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ String function_name;
+ FunctionOverloadResolverPtr function;
+ AggregateFunctionPtr aggregate_function;
+ DataTypePtr result_type;
+ ConstantValuePtr constant_value;
+
+ static constexpr size_t parameters_child_index = 0;
+ static constexpr size_t arguments_child_index = 1;
+ static constexpr size_t window_child_index = 2;
+ static constexpr size_t children_size = window_child_index + 1;
+};
+
+}
diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp
new file mode 100644
index 00000000000..ea2412eadb2
--- /dev/null
+++ b/src/Analyzer/IQueryTreeNode.cpp
@@ -0,0 +1,332 @@
+#include
+
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNSUPPORTED_METHOD;
+}
+
+const char * toString(QueryTreeNodeType type)
+{
+ switch (type)
+ {
+ case QueryTreeNodeType::IDENTIFIER: return "IDENTIFIER";
+ case QueryTreeNodeType::MATCHER: return "MATCHER";
+ case QueryTreeNodeType::TRANSFORMER: return "TRANSFORMER";
+ case QueryTreeNodeType::LIST: return "LIST";
+ case QueryTreeNodeType::CONSTANT: return "CONSTANT";
+ case QueryTreeNodeType::FUNCTION: return "FUNCTION";
+ case QueryTreeNodeType::COLUMN: return "COLUMN";
+ case QueryTreeNodeType::LAMBDA: return "LAMBDA";
+ case QueryTreeNodeType::SORT: return "SORT";
+ case QueryTreeNodeType::INTERPOLATE: return "INTERPOLATE";
+ case QueryTreeNodeType::WINDOW: return "WINDOW";
+ case QueryTreeNodeType::TABLE: return "TABLE";
+ case QueryTreeNodeType::TABLE_FUNCTION: return "TABLE_FUNCTION";
+ case QueryTreeNodeType::QUERY: return "QUERY";
+ case QueryTreeNodeType::ARRAY_JOIN: return "ARRAY_JOIN";
+ case QueryTreeNodeType::JOIN: return "JOIN";
+ case QueryTreeNodeType::UNION: return "UNION";
+ }
+}
+
+IQueryTreeNode::IQueryTreeNode(size_t children_size, size_t weak_pointers_size)
+{
+ children.resize(children_size);
+ weak_pointers.resize(weak_pointers_size);
+}
+
+IQueryTreeNode::IQueryTreeNode(size_t children_size)
+{
+ children.resize(children_size);
+}
+
+namespace
+{
+
+using NodePair = std::pair;
+
+struct NodePairHash
+{
+ size_t operator()(const NodePair & node_pair) const
+ {
+ auto hash = std::hash();
+
+ size_t result = 0;
+ boost::hash_combine(result, hash(node_pair.first));
+ boost::hash_combine(result, hash(node_pair.second));
+
+ return result;
+ }
+};
+
+}
+
+bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs) const
+{
+ std::vector nodes_to_process;
+ std::unordered_set equals_pairs;
+
+ nodes_to_process.emplace_back(this, &rhs);
+
+ while (!nodes_to_process.empty())
+ {
+ auto nodes_to_compare = nodes_to_process.back();
+ nodes_to_process.pop_back();
+
+ const auto * lhs_node_to_compare = nodes_to_compare.first;
+ const auto * rhs_node_to_compare = nodes_to_compare.second;
+
+ if (equals_pairs.contains(std::make_pair(lhs_node_to_compare, rhs_node_to_compare)))
+ continue;
+
+ assert(lhs_node_to_compare);
+ assert(rhs_node_to_compare);
+
+ if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() ||
+ lhs_node_to_compare->alias != rhs_node_to_compare->alias ||
+ !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare))
+ {
+ return false;
+ }
+
+ const auto & lhs_children = lhs_node_to_compare->children;
+ const auto & rhs_children = rhs_node_to_compare->children;
+
+ size_t lhs_children_size = lhs_children.size();
+ if (lhs_children_size != rhs_children.size())
+ return false;
+
+ for (size_t i = 0; i < lhs_children_size; ++i)
+ {
+ const auto & lhs_child = lhs_children[i];
+ const auto & rhs_child = rhs_children[i];
+
+ if (!lhs_child && !rhs_child)
+ continue;
+ else if (lhs_child && !rhs_child)
+ return false;
+ else if (!lhs_child && rhs_child)
+ return false;
+
+ nodes_to_process.emplace_back(lhs_child.get(), rhs_child.get());
+ }
+
+ const auto & lhs_weak_pointers = lhs_node_to_compare->weak_pointers;
+ const auto & rhs_weak_pointers = rhs_node_to_compare->weak_pointers;
+
+ size_t lhs_weak_pointers_size = lhs_weak_pointers.size();
+
+ if (lhs_weak_pointers_size != rhs_weak_pointers.size())
+ return false;
+
+ for (size_t i = 0; i < lhs_weak_pointers_size; ++i)
+ {
+ auto lhs_strong_pointer = lhs_weak_pointers[i].lock();
+ auto rhs_strong_pointer = rhs_weak_pointers[i].lock();
+
+ if (!lhs_strong_pointer && !rhs_strong_pointer)
+ continue;
+ else if (lhs_strong_pointer && !rhs_strong_pointer)
+ return false;
+ else if (!lhs_strong_pointer && rhs_strong_pointer)
+ return false;
+
+ nodes_to_process.emplace_back(lhs_strong_pointer.get(), rhs_strong_pointer.get());
+ }
+
+ equals_pairs.emplace(lhs_node_to_compare, rhs_node_to_compare);
+ }
+
+ return true;
+}
+
+IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
+{
+ HashState hash_state;
+
+ std::unordered_map node_to_identifier;
+
+ std::vector nodes_to_process;
+ nodes_to_process.push_back(this);
+
+ while (!nodes_to_process.empty())
+ {
+ const auto * node_to_process = nodes_to_process.back();
+ nodes_to_process.pop_back();
+
+ auto node_identifier_it = node_to_identifier.find(node_to_process);
+ if (node_identifier_it != node_to_identifier.end())
+ {
+ hash_state.update(node_identifier_it->second);
+ continue;
+ }
+
+ node_to_identifier.emplace(node_to_process, node_to_identifier.size());
+
+ hash_state.update(static_cast(node_to_process->getNodeType()));
+ if (!node_to_process->alias.empty())
+ {
+ hash_state.update(node_to_process->alias.size());
+ hash_state.update(node_to_process->alias);
+ }
+
+ node_to_process->updateTreeHashImpl(hash_state);
+
+ hash_state.update(node_to_process->children.size());
+
+ for (const auto & node_to_process_child : node_to_process->children)
+ {
+ if (!node_to_process_child)
+ continue;
+
+ nodes_to_process.push_back(node_to_process_child.get());
+ }
+
+ hash_state.update(node_to_process->weak_pointers.size());
+
+ for (const auto & weak_pointer : node_to_process->weak_pointers)
+ {
+ auto strong_pointer = weak_pointer.lock();
+ if (!strong_pointer)
+ continue;
+
+ nodes_to_process.push_back(strong_pointer.get());
+ }
+ }
+
+ Hash result;
+ hash_state.get128(result);
+
+ return result;
+}
+
+QueryTreeNodePtr IQueryTreeNode::clone() const
+{
+ /** Clone tree with this node as root.
+ *
+ * Algorithm
+ * For each node we clone state and also create mapping old pointer to new pointer.
+ * For each cloned node we update weak pointers array.
+ *
+ * After that we can update pointer in weak pointers array using old pointer to new pointer mapping.
+ */
+ std::unordered_map old_pointer_to_new_pointer;
+ std::vector weak_pointers_to_update_after_clone;
+
+ QueryTreeNodePtr result_cloned_node_place;
+
+ std::vector> nodes_to_clone;
+ nodes_to_clone.emplace_back(this, &result_cloned_node_place);
+
+ while (!nodes_to_clone.empty())
+ {
+ const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back();
+ nodes_to_clone.pop_back();
+
+ auto node_clone = node_to_clone->cloneImpl();
+ *place_for_cloned_node = node_clone;
+
+ node_clone->setAlias(node_to_clone->alias);
+ node_clone->setOriginalAST(node_to_clone->original_ast);
+ node_clone->children = node_to_clone->children;
+ node_clone->weak_pointers = node_to_clone->weak_pointers;
+
+ old_pointer_to_new_pointer.emplace(node_to_clone, node_clone);
+
+ for (auto & child : node_clone->children)
+ {
+ if (!child)
+ continue;
+
+ nodes_to_clone.emplace_back(child.get(), &child);
+ }
+
+ for (auto & weak_pointer : node_clone->weak_pointers)
+ {
+ weak_pointers_to_update_after_clone.push_back(&weak_pointer);
+ }
+ }
+
+ /** Update weak pointers to new pointers if they were changed during clone.
+ * To do this we check old pointer to new pointer map, if weak pointer
+ * strong pointer exists as old pointer in map, reinitialize weak pointer with new pointer.
+ */
+ for (auto & weak_pointer_ptr : weak_pointers_to_update_after_clone)
+ {
+ assert(weak_pointer_ptr);
+ auto strong_pointer = weak_pointer_ptr->lock();
+ auto it = old_pointer_to_new_pointer.find(strong_pointer.get());
+
+ /** If node had weak pointer to some other node and this node is not part of cloned subtree do not update weak pointer.
+ * It will continue to point to previous location and it is expected.
+ *
+ * Example: SELECT id FROM test_table;
+ * During analysis `id` is resolved as column node and `test_table` is column source.
+ * If we clone `id` column, result column node weak source pointer will point to the same `test_table` column source.
+ */
+ if (it == old_pointer_to_new_pointer.end())
+ continue;
+
+ *weak_pointer_ptr = it->second;
+ }
+
+ return result_cloned_node_place;
+}
+
+ASTPtr IQueryTreeNode::toAST() const
+{
+ auto converted_node = toASTImpl();
+
+ if (auto * ast_with_alias = dynamic_cast(converted_node.get()))
+ converted_node->setAlias(alias);
+
+ return converted_node;
+}
+
+String IQueryTreeNode::formatOriginalASTForErrorMessage() const
+{
+ if (!original_ast)
+ throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Original AST was not set");
+
+ return original_ast->formatForErrorMessage();
+}
+
+String IQueryTreeNode::formatConvertedASTForErrorMessage() const
+{
+ return toAST()->formatForErrorMessage();
+}
+
+String IQueryTreeNode::dumpTree() const
+{
+ WriteBufferFromOwnString buffer;
+ dumpTree(buffer);
+
+ return buffer.str();
+}
+
+size_t IQueryTreeNode::FormatState::getNodeId(const IQueryTreeNode * node)
+{
+ auto [it, _] = node_to_id.emplace(node, node_to_id.size());
+ return it->second;
+}
+
+void IQueryTreeNode::dumpTree(WriteBuffer & buffer) const
+{
+ FormatState state;
+ dumpTreeImpl(buffer, state, 0);
+}
+
+}
diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
new file mode 100644
index 00000000000..2ba96d27575
--- /dev/null
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -0,0 +1,282 @@
+#pragma once
+
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+#include
+#include
+
+class SipHash;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int UNSUPPORTED_METHOD;
+ extern const int LOGICAL_ERROR;
+}
+
+class WriteBuffer;
+
+/// Query tree node type
+enum class QueryTreeNodeType
+{
+ IDENTIFIER,
+ MATCHER,
+ TRANSFORMER,
+ LIST,
+ CONSTANT,
+ FUNCTION,
+ COLUMN,
+ LAMBDA,
+ SORT,
+ INTERPOLATE,
+ WINDOW,
+ TABLE,
+ TABLE_FUNCTION,
+ QUERY,
+ ARRAY_JOIN,
+ JOIN,
+ UNION
+};
+
+/// Convert query tree node type to string
+const char * toString(QueryTreeNodeType type);
+
+/** Query tree is semantical representation of query.
+ * Query tree node represent node in query tree.
+ * IQueryTreeNode is base class for all query tree nodes.
+ *
+ * Important property of query tree is that each query tree node can contain weak pointers to other
+ * query tree nodes. Keeping weak pointer to other query tree nodes can be useful for example for column
+ * to keep weak pointer to column source, column source can be table, lambda, subquery and preserving of
+ * such information can significantly simplify query planning.
+ *
+ * Another important property of query tree it must be convertible to AST without losing information.
+ */
+class IQueryTreeNode;
+using QueryTreeNodePtr = std::shared_ptr;
+using QueryTreeNodes = std::vector;
+using QueryTreeNodeWeakPtr = std::weak_ptr;
+using QueryTreeWeakNodes = std::vector;
+
+class IQueryTreeNode : public TypePromotion
+{
+public:
+ virtual ~IQueryTreeNode() = default;
+
+ /// Get query tree node type
+ virtual QueryTreeNodeType getNodeType() const = 0;
+
+ /// Get query tree node type name
+ const char * getNodeTypeName() const
+ {
+ return toString(getNodeType());
+ }
+
+ /** Get name of query tree node that can be used as part of expression.
+ * TODO: Projection name, expression name must be refactored in better interface.
+ */
+ virtual String getName() const
+ {
+ throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getName is not supported for {} query node", getNodeTypeName());
+ }
+
+ /** Get result type of query tree node that can be used as part of expression.
+ * If node does not support this method exception is thrown.
+ * TODO: Maybe this can be a part of ExpressionQueryTreeNode.
+ */
+ virtual DataTypePtr getResultType() const
+ {
+ throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Method getResultType is not supported for {} query node", getNodeTypeName());
+ }
+
+ /// Returns true if node has constant value
+ bool hasConstantValue() const
+ {
+ return getConstantValueOrNull() != nullptr;
+ }
+
+ /** Returns constant value with type if node has constant value, and can be replaced with it.
+ * Examples: scalar subquery, function with constant arguments.
+ */
+ virtual const ConstantValue & getConstantValue() const
+ {
+ auto constant_value = getConstantValueOrNull();
+ if (!constant_value)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Node does not have constant value");
+
+ return *constant_value;
+ }
+
+ /// Returns constant value with type if node has constant value or null otherwise
+ virtual ConstantValuePtr getConstantValueOrNull() const
+ {
+ return {};
+ }
+
+ /** Is tree equal to other tree with node root.
+ *
+ * Aliases of query tree nodes are compared during isEqual call.
+ * Original ASTs of query tree nodes are not compared during isEqual call.
+ */
+ bool isEqual(const IQueryTreeNode & rhs) const;
+
+ using Hash = std::pair;
+ using HashState = SipHash;
+
+ /** Get tree hash identifying current tree
+ *
+ * Alias of query tree node is part of query tree hash.
+ * Original AST is not part of query tree hash.
+ */
+ Hash getTreeHash() const;
+
+ /// Get a deep copy of the query tree
+ QueryTreeNodePtr clone() const;
+
+ /// Returns true if node has alias, false otherwise
+ bool hasAlias() const
+ {
+ return !alias.empty();
+ }
+
+ /// Get node alias
+ const String & getAlias() const
+ {
+ return alias;
+ }
+
+ /// Set node alias
+ void setAlias(String alias_value)
+ {
+ alias = std::move(alias_value);
+ }
+
+ /// Remove node alias
+ void removeAlias()
+ {
+ alias = {};
+ }
+
+ /// Returns true if query tree node has original AST, false otherwise
+ bool hasOriginalAST() const
+ {
+ return original_ast != nullptr;
+ }
+
+ /// Get query tree node original AST
+ const ASTPtr & getOriginalAST() const
+ {
+ return original_ast;
+ }
+
+ /** Set query tree node original AST.
+ * This AST will not be modified later.
+ */
+ void setOriginalAST(ASTPtr original_ast_value)
+ {
+ original_ast = std::move(original_ast_value);
+ }
+
+ /** If query tree has original AST format it for error message.
+ * Otherwise exception is thrown.
+ */
+ String formatOriginalASTForErrorMessage() const;
+
+ /// Convert query tree to AST
+ ASTPtr toAST() const;
+
+ /// Convert query tree to AST and then format it for error message.
+ String formatConvertedASTForErrorMessage() const;
+
+ /** Format AST for error message.
+ * If original AST exists use `formatOriginalASTForErrorMessage`.
+ * Otherwise use `formatConvertedASTForErrorMessage`.
+ */
+ String formatASTForErrorMessage() const
+ {
+ if (original_ast)
+ return formatOriginalASTForErrorMessage();
+
+ return formatConvertedASTForErrorMessage();
+ }
+
+ /// Dump query tree to string
+ String dumpTree() const;
+
+ /// Dump query tree to buffer
+ void dumpTree(WriteBuffer & buffer) const;
+
+ class FormatState
+ {
+ public:
+ size_t getNodeId(const IQueryTreeNode * node);
+
+ private:
+ std::unordered_map node_to_id;
+ };
+
+ /** Dump query tree to buffer starting with indent.
+ *
+ * Node must also dump its children.
+ */
+ virtual void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const = 0;
+
+ /// Get query tree node children
+ QueryTreeNodes & getChildren()
+ {
+ return children;
+ }
+
+ /// Get query tree node children
+ const QueryTreeNodes & getChildren() const
+ {
+ return children;
+ }
+
+protected:
+ /** Construct query tree node.
+ * Resize children to children size.
+ * Resize weak pointers to weak pointers size.
+ */
+ explicit IQueryTreeNode(size_t children_size, size_t weak_pointers_size);
+
+ /// Construct query tree node and resize children to children size
+ explicit IQueryTreeNode(size_t children_size);
+
+ /** Subclass must compare its internal state with rhs node internal state and do not compare children or weak pointers to other
+ * query tree nodes.
+ */
+ virtual bool isEqualImpl(const IQueryTreeNode & rhs) const = 0;
+
+ /** Subclass must update tree hash with its internal state and do not update tree hash for children or weak pointers to other
+ * query tree nodes.
+ */
+ virtual void updateTreeHashImpl(HashState & hash_state) const = 0;
+
+ /** Subclass must clone its internal state and do not clone children or weak pointers to other
+ * query tree nodes.
+ */
+ virtual QueryTreeNodePtr cloneImpl() const = 0;
+
+ /// Subclass must convert its internal state and its children to AST
+ virtual ASTPtr toASTImpl() const = 0;
+
+ QueryTreeNodes children;
+ QueryTreeWeakNodes weak_pointers;
+
+private:
+ String alias;
+ ASTPtr original_ast;
+};
+
+}
diff --git a/src/Analyzer/IQueryTreePass.h b/src/Analyzer/IQueryTreePass.h
new file mode 100644
index 00000000000..39b3d743ed3
--- /dev/null
+++ b/src/Analyzer/IQueryTreePass.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include
+
+#include
+
+
+namespace DB
+{
+
+/** After query tree is build it can be later processed by query tree passes.
+ * This is abstract base class for all query tree passes.
+ *
+ * Query tree pass can make query tree modifications, after each pass query tree must be valid.
+ * Query tree pass must be isolated and perform only necessary query tree modifications for doing its job.
+ * Dependencies between passes must be avoided.
+ */
+class IQueryTreePass;
+using QueryTreePassPtr = std::shared_ptr;
+using QueryTreePasses = std::vector;
+
+class IQueryTreePass
+{
+public:
+ virtual ~IQueryTreePass() = default;
+
+ /// Get query tree pass name
+ virtual String getName() = 0;
+
+ /// Get query tree pass description
+ virtual String getDescription() = 0;
+
+ /// Run pass over query tree
+ virtual void run(QueryTreeNodePtr query_tree_node, ContextPtr context) = 0;
+
+};
+
+}
diff --git a/src/Analyzer/Identifier.h b/src/Analyzer/Identifier.h
new file mode 100644
index 00000000000..2252ce2854f
--- /dev/null
+++ b/src/Analyzer/Identifier.h
@@ -0,0 +1,412 @@
+#pragma once
+
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+
+namespace DB
+{
+
+/** Identifier consists from identifier parts.
+ * Each identifier part is arbitrary long sequence of digits, underscores, lowercase and uppercase letters.
+ * Example: a, a.b, a.b.c.
+ */
+class Identifier
+{
+public:
+ Identifier() = default;
+
+ /// Create Identifier from parts
+ explicit Identifier(const std::vector & parts_)
+ : parts(parts_)
+ , full_name(boost::algorithm::join(parts, "."))
+ {
+ }
+
+ /// Create Identifier from parts
+ explicit Identifier(std::vector && parts_)
+ : parts(std::move(parts_))
+ , full_name(boost::algorithm::join(parts, "."))
+ {
+ }
+
+ /// Create Identifier from full name, full name is split with '.' as separator.
+ explicit Identifier(const std::string & full_name_)
+ : full_name(full_name_)
+ {
+ boost::split(parts, full_name, [](char c) { return c == '.'; });
+ }
+
+ /// Create Identifier from full name, full name is split with '.' as separator.
+ explicit Identifier(std::string && full_name_)
+ : full_name(std::move(full_name_))
+ {
+ boost::split(parts, full_name, [](char c) { return c == '.'; });
+ }
+
+ const std::string & getFullName() const
+ {
+ return full_name;
+ }
+
+ const std::vector & getParts() const
+ {
+ return parts;
+ }
+
+ size_t getPartsSize() const
+ {
+ return parts.size();
+ }
+
+ bool empty() const
+ {
+ return parts.empty();
+ }
+
+ bool isEmpty() const
+ {
+ return parts.empty();
+ }
+
+ bool isShort() const
+ {
+ return parts.size() == 1;
+ }
+
+ bool isCompound() const
+ {
+ return parts.size() > 1;
+ }
+
+ const std::string & at(size_t index) const
+ {
+ if (index >= parts.size())
+ throw std::out_of_range("identifier access part is out of range");
+
+ return parts[index];
+ }
+
+ const std::string & operator[](size_t index) const
+ {
+ return parts[index];
+ }
+
+ const std::string & front() const
+ {
+ return parts.front();
+ }
+
+ const std::string & back() const
+ {
+ return parts.back();
+ }
+
+ /// Returns true, if identifier starts with part, false otherwise
+ bool startsWith(const std::string_view & part)
+ {
+ return !parts.empty() && parts[0] == part;
+ }
+
+ /// Returns true, if identifier ends with part, false otherwise
+ bool endsWith(const std::string_view & part)
+ {
+ return !parts.empty() && parts.back() == part;
+ }
+
+ using const_iterator = std::vector::const_iterator;
+
+ const_iterator begin() const
+ {
+ return parts.begin();
+ }
+
+ const_iterator end() const
+ {
+ return parts.end();
+ }
+
+ void popFirst(size_t parts_to_remove_size)
+ {
+ assert(parts_to_remove_size <= parts.size());
+
+ size_t parts_size = parts.size();
+ std::vector result_parts;
+ result_parts.reserve(parts_size - parts_to_remove_size);
+
+ for (size_t i = parts_to_remove_size; i < parts_size; ++i)
+ result_parts.push_back(std::move(parts[i]));
+
+ parts = std::move(result_parts);
+ full_name = boost::algorithm::join(parts, ".");
+ }
+
+ void popFirst()
+ {
+ return popFirst(1);
+ }
+
+ void popLast(size_t parts_to_remove_size)
+ {
+ assert(parts_to_remove_size <= parts.size());
+
+ for (size_t i = 0; i < parts_to_remove_size; ++i)
+ {
+ size_t last_part_size = parts.back().size();
+ parts.pop_back();
+ bool is_not_last = !parts.empty();
+ full_name.resize(full_name.size() - (last_part_size + static_cast(is_not_last)));
+ }
+ }
+
+ void popLast()
+ {
+ return popLast(1);
+ }
+
+ void pop_back() /// NOLINT
+ {
+ popLast();
+ }
+
+ void push_back(std::string && part) /// NOLINT
+ {
+ parts.push_back(std::move(part));
+ full_name += '.';
+ full_name += parts.back();
+ }
+
+ void push_back(const std::string & part) /// NOLINT
+ {
+ parts.push_back(part);
+ full_name += '.';
+ full_name += parts.back();
+ }
+
+ template
+ void emplace_back(Args&&... args) /// NOLINT
+ {
+ parts.emplace_back(std::forward(args)...);
+ full_name += '.';
+ full_name += parts.back();
+ }
+private:
+ std::vector parts;
+ std::string full_name;
+};
+
+inline bool operator==(const Identifier & lhs, const Identifier & rhs)
+{
+ return lhs.getFullName() == rhs.getFullName();
+}
+
+inline bool operator!=(const Identifier & lhs, const Identifier & rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline std::ostream & operator<<(std::ostream & stream, const Identifier & identifier)
+{
+ stream << identifier.getFullName();
+ return stream;
+}
+
+using Identifiers = std::vector;
+
+/// View for Identifier
+class IdentifierView
+{
+public:
+ IdentifierView() = default;
+
+ IdentifierView(const Identifier & identifier) /// NOLINT
+ : full_name_view(identifier.getFullName())
+ , parts_start_it(identifier.begin())
+ , parts_end_it(identifier.end())
+ {}
+
+ std::string_view getFullName() const
+ {
+ return full_name_view;
+ }
+
+ size_t getPartsSize() const
+ {
+ return parts_end_it - parts_start_it;
+ }
+
+ bool empty() const
+ {
+ return parts_start_it == parts_end_it;
+ }
+
+ bool isEmpty() const
+ {
+ return parts_start_it == parts_end_it;
+ }
+
+ bool isShort() const
+ {
+ return getPartsSize() == 1;
+ }
+
+ bool isCompound() const
+ {
+ return getPartsSize() > 1;
+ }
+
+ std::string_view at(size_t index) const
+ {
+ if (index >= getPartsSize())
+ throw std::out_of_range("identifier access part is out of range");
+
+ return *(parts_start_it + index);
+ }
+
+ std::string_view operator[](size_t index) const
+ {
+ return *(parts_start_it + index);
+ }
+
+ std::string_view front() const
+ {
+ return *parts_start_it;
+ }
+
+ std::string_view back() const
+ {
+ return *(parts_end_it - 1);
+ }
+
+ bool startsWith(std::string_view part) const
+ {
+ return !isEmpty() && *parts_start_it == part;
+ }
+
+ bool endsWith(std::string_view part) const
+ {
+ return !isEmpty() && *(parts_end_it - 1) == part;
+ }
+
+ void popFirst(size_t parts_to_remove_size)
+ {
+ assert(parts_to_remove_size <= getPartsSize());
+
+ for (size_t i = 0; i < parts_to_remove_size; ++i)
+ {
+ size_t part_size = parts_start_it->size();
+ ++parts_start_it;
+ bool is_not_last = parts_start_it != parts_end_it;
+ full_name_view.remove_prefix(part_size + is_not_last);
+ }
+ }
+
+ void popFirst()
+ {
+ popFirst(1);
+ }
+
+ void popLast(size_t parts_to_remove_size)
+ {
+ assert(parts_to_remove_size <= getPartsSize());
+
+ for (size_t i = 0; i < parts_to_remove_size; ++i)
+ {
+ size_t last_part_size = (parts_end_it - 1)->size();
+ --parts_end_it;
+ bool is_not_last = parts_start_it != parts_end_it;
+ full_name_view.remove_suffix(last_part_size + is_not_last);
+ }
+ }
+
+ void popLast()
+ {
+ popLast(1);
+ }
+
+ using const_iterator = Identifier::const_iterator;
+
+ const_iterator begin() const
+ {
+ return parts_start_it;
+ }
+
+ const_iterator end() const
+ {
+ return parts_end_it;
+ }
+private:
+ std::string_view full_name_view;
+ const_iterator parts_start_it;
+ const_iterator parts_end_it;
+};
+
+inline bool operator==(const IdentifierView & lhs, const IdentifierView & rhs)
+{
+ return lhs.getFullName() == rhs.getFullName();
+}
+
+inline bool operator!=(const IdentifierView & lhs, const IdentifierView & rhs)
+{
+ return !(lhs == rhs);
+}
+
+inline std::ostream & operator<<(std::ostream & stream, const IdentifierView & identifier_view)
+{
+ stream << identifier_view.getFullName();
+ return stream;
+}
+
+}
+
+/// See https://fmt.dev/latest/api.html#formatting-user-defined-types
+
+template <>
+struct fmt::formatter
+{
+ constexpr static auto parse(format_parse_context & ctx)
+ {
+ const auto * it = ctx.begin();
+ const auto * end = ctx.end();
+
+ /// Only support {}.
+ if (it != end && *it != '}')
+ throw format_error("invalid format");
+
+ return it;
+ }
+
+ template
+ auto format(const DB::Identifier & identifier, FormatContext & ctx)
+ {
+ return format_to(ctx.out(), "{}", identifier.getFullName());
+ }
+};
+
+template <>
+struct fmt::formatter
+{
+ constexpr static auto parse(format_parse_context & ctx)
+ {
+ const auto * it = ctx.begin();
+ const auto * end = ctx.end();
+
+ /// Only support {}.
+ if (it != end && *it != '}')
+ throw format_error("invalid format");
+
+ return it;
+ }
+
+ template
+ auto format(const DB::IdentifierView & identifier_view, FormatContext & ctx)
+ {
+ return format_to(ctx.out(), "{}", identifier_view.getFullName());
+ }
+};
diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp
new file mode 100644
index 00000000000..4efc7f515ea
--- /dev/null
+++ b/src/Analyzer/IdentifierNode.cpp
@@ -0,0 +1,75 @@
+#include
+
+#include
+
+#include
+#include
+
+#include
+
+namespace DB
+{
+
+IdentifierNode::IdentifierNode(Identifier identifier_)
+ : IQueryTreeNode(children_size)
+ , identifier(std::move(identifier_))
+{}
+
+IdentifierNode::IdentifierNode(Identifier identifier_, TableExpressionModifiers table_expression_modifiers_)
+ : IQueryTreeNode(children_size)
+ , identifier(std::move(identifier_))
+ , table_expression_modifiers(std::move(table_expression_modifiers_))
+{}
+
+void IdentifierNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "IDENTIFIER id: " << format_state.getNodeId(this);
+
+ if (hasAlias())
+ buffer << ", alias: " << getAlias();
+
+ buffer << ", identifier: " << identifier.getFullName();
+
+ if (table_expression_modifiers)
+ {
+ buffer << ", ";
+ table_expression_modifiers->dump(buffer);
+ }
+}
+
+bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+
+ if (table_expression_modifiers && rhs_typed.table_expression_modifiers && table_expression_modifiers != rhs_typed.table_expression_modifiers)
+ return false;
+ else if (table_expression_modifiers && !rhs_typed.table_expression_modifiers)
+ return false;
+ else if (!table_expression_modifiers && rhs_typed.table_expression_modifiers)
+ return false;
+
+ return identifier == rhs_typed.identifier;
+}
+
+void IdentifierNode::updateTreeHashImpl(HashState & state) const
+{
+ const auto & identifier_name = identifier.getFullName();
+ state.update(identifier_name.size());
+ state.update(identifier_name);
+
+ if (table_expression_modifiers)
+ table_expression_modifiers->updateTreeHash(state);
+}
+
+QueryTreeNodePtr IdentifierNode::cloneImpl() const
+{
+ return std::make_shared(identifier);
+}
+
+ASTPtr IdentifierNode::toASTImpl() const
+{
+ auto identifier_parts = identifier.getParts();
+ return std::make_shared(std::move(identifier_parts));
+}
+
+}
diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h
new file mode 100644
index 00000000000..7a2351f0ece
--- /dev/null
+++ b/src/Analyzer/IdentifierNode.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include
+#include
+#include
+
+namespace DB
+{
+
+/** Identifier node represents identifier in query tree.
+ * Example: SELECT a FROM test_table.
+ * a - is identifier.
+ * test_table - is identifier.
+ *
+ * Identifier resolution must be done during query analysis pass.
+ */
+class IdentifierNode final : public IQueryTreeNode
+{
+public:
+ /// Construct identifier node with identifier
+ explicit IdentifierNode(Identifier identifier_);
+
+ /** Construct identifier node with identifier and table expression modifiers
+ * when identifier node is part of JOIN TREE.
+ *
+ * Example: SELECT * FROM test_table SAMPLE 0.1 OFFSET 0.1 FINAL
+ */
+ explicit IdentifierNode(Identifier identifier_, TableExpressionModifiers table_expression_modifiers_);
+
+ /// Get identifier
+ const Identifier & getIdentifier() const
+ {
+ return identifier;
+ }
+
+ /// Return true if identifier node has table expression modifiers, false otherwise
+ bool hasTableExpressionModifiers() const
+ {
+ return table_expression_modifiers.has_value();
+ }
+
+ /// Get table expression modifiers
+ const std::optional & getTableExpressionModifiers() const
+ {
+ return table_expression_modifiers;
+ }
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::IDENTIFIER;
+ }
+
+ String getName() const override
+ {
+ return identifier.getFullName();
+ }
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ Identifier identifier;
+ std::optional table_expression_modifiers;
+
+ static constexpr size_t children_size = 0;
+};
+
+}
diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h
new file mode 100644
index 00000000000..96972024d87
--- /dev/null
+++ b/src/Analyzer/InDepthQueryTreeVisitor.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include
+
+#include
+
+
+namespace DB
+{
+
+/** Visitor that traverse query tree in depth.
+ * Derived class must implement `visitImpl` method.
+ * Additionally subclass can control if child need to be visited using `needChildVisit` method, by
+ * default all node children are visited.
+ * By default visitor traverse tree from top to bottom, if bottom to top traverse is required subclass
+ * can override `shouldTraverseTopToBottom` method.
+ *
+ * Usage example:
+ * class FunctionsVisitor : public InDepthQueryTreeVisitor
+ * {
+ * void visitImpl(VisitQueryTreeNodeType & query_tree_node)
+ * {
+ * if (query_tree_node->getNodeType() == QueryTreeNodeType::FUNCTION)
+ * processFunctionNode(query_tree_node);
+ * }
+ * }
+ */
+template
+class InDepthQueryTreeVisitor
+{
+public:
+ using VisitQueryTreeNodeType = std::conditional_t;
+
+ /// Return true if visitor should traverse tree top to bottom, false otherwise
+ bool shouldTraverseTopToBottom() const
+ {
+ return true;
+ }
+
+ /// Return true if visitor should visit child, false otherwise
+ bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
+ {
+ return true;
+ }
+
+ void visit(VisitQueryTreeNodeType & query_tree_node)
+ {
+ bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom();
+ if (!traverse_top_to_bottom)
+ visitChildren(query_tree_node);
+
+ getDerived().visitImpl(query_tree_node);
+
+ if (traverse_top_to_bottom)
+ visitChildren(query_tree_node);
+ }
+
+private:
+ Derived & getDerived()
+ {
+ return *static_cast(this);
+ }
+
+ const Derived & getDerived() const
+ {
+ return *static_cast(this);
+ }
+
+ void visitChildren(VisitQueryTreeNodeType & expression)
+ {
+ for (auto & child : expression->getChildren())
+ {
+ if (!child)
+ continue;
+
+ bool need_visit_child = getDerived().needChildVisit(expression, child);
+
+ if (need_visit_child)
+ visit(child);
+ }
+ }
+};
+
+template
+using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor;
+
+}
diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp
new file mode 100644
index 00000000000..dcc14d6b6d5
--- /dev/null
+++ b/src/Analyzer/InterpolateNode.cpp
@@ -0,0 +1,66 @@
+#include
+
+#include
+
+#include
+#include
+
+#include
+
+namespace DB
+{
+
+InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_)
+ : IQueryTreeNode(children_size)
+{
+ children[expression_child_index] = std::move(expression_);
+ children[interpolate_expression_child_index] = std::move(interpolate_expression_);
+}
+
+String InterpolateNode::getName() const
+{
+ String result = getExpression()->getName();
+ result += " AS ";
+ result += getInterpolateExpression()->getName();
+
+ return result;
+}
+
+void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "INTERPOLATE id: " << format_state.getNodeId(this);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION\n";
+ getExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "INTERPOLATE_EXPRESSION\n";
+ getInterpolateExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+}
+
+bool InterpolateNode::isEqualImpl(const IQueryTreeNode &) const
+{
+ /// No state in interpolate node
+ return true;
+}
+
+void InterpolateNode::updateTreeHashImpl(HashState &) const
+{
+ /// No state in interpolate node
+}
+
+QueryTreeNodePtr InterpolateNode::cloneImpl() const
+{
+ return std::make_shared(nullptr /*expression*/, nullptr /*interpolate_expression*/);
+}
+
+ASTPtr InterpolateNode::toASTImpl() const
+{
+ auto result = std::make_shared();
+ result->column = getExpression()->toAST()->getColumnName();
+ result->children.push_back(getInterpolateExpression()->toAST());
+ result->expr = result->children.back();
+
+ return result;
+}
+
+}
diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h
new file mode 100644
index 00000000000..5bc8eded0bb
--- /dev/null
+++ b/src/Analyzer/InterpolateNode.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include
+#include
+
+namespace DB
+{
+
+/** Interpolate node represents expression interpolation in INTERPOLATE section that is part of ORDER BY section in query tree.
+ *
+ * Example: SELECT * FROM test_table ORDER BY id WITH FILL INTERPOLATE (value AS value + 1);
+ * value - expression to interpolate.
+ * value + 1 - interpolate expression.
+ */
+class InterpolateNode;
+using InterpolateNodePtr = std::shared_ptr;
+
+class InterpolateNode final : public IQueryTreeNode
+{
+public:
+ /// Initialize interpolate node with expression and interpolate expression
+ explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_);
+
+ /// Get expression to interpolate
+ const QueryTreeNodePtr & getExpression() const
+ {
+ return children[expression_child_index];
+ }
+
+ /// Get expression to interpolate
+ QueryTreeNodePtr & getExpression()
+ {
+ return children[expression_child_index];
+ }
+
+ /// Get interpolate expression
+ const QueryTreeNodePtr & getInterpolateExpression() const
+ {
+ return children[interpolate_expression_child_index];
+ }
+
+ /// Get interpolate expression
+ QueryTreeNodePtr & getInterpolateExpression()
+ {
+ return children[interpolate_expression_child_index];
+ }
+
+ QueryTreeNodeType getNodeType() const override
+ {
+ return QueryTreeNodeType::INTERPOLATE;
+ }
+
+ String getName() const override;
+
+ void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
+
+protected:
+ bool isEqualImpl(const IQueryTreeNode & rhs) const override;
+
+ void updateTreeHashImpl(HashState & hash_state) const override;
+
+ QueryTreeNodePtr cloneImpl() const override;
+
+ ASTPtr toASTImpl() const override;
+
+private:
+ static constexpr size_t expression_child_index = 0;
+ static constexpr size_t interpolate_expression_child_index = 1;
+ static constexpr size_t children_size = interpolate_expression_child_index + 1;
+};
+
+}
diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp
new file mode 100644
index 00000000000..28a0c4ad7e0
--- /dev/null
+++ b/src/Analyzer/JoinNode.cpp
@@ -0,0 +1,116 @@
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace DB
+{
+
+JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_,
+ QueryTreeNodePtr right_table_expression_,
+ QueryTreeNodePtr join_expression_,
+ JoinLocality locality_,
+ JoinStrictness strictness_,
+ JoinKind kind_)
+ : IQueryTreeNode(children_size)
+ , locality(locality_)
+ , strictness(strictness_)
+ , kind(kind_)
+{
+ children[left_table_expression_child_index] = std::move(left_table_expression_);
+ children[right_table_expression_child_index] = std::move(right_table_expression_);
+ children[join_expression_child_index] = std::move(join_expression_);
+}
+
+ASTPtr JoinNode::toASTTableJoin() const
+{
+ auto join_ast = std::make_shared();
+ join_ast->locality = locality;
+ join_ast->strictness = strictness;
+ join_ast->kind = kind;
+
+ if (children[join_expression_child_index])
+ {
+ auto join_expression_ast = children[join_expression_child_index]->toAST();
+
+ if (children[join_expression_child_index]->getNodeType() == QueryTreeNodeType::LIST)
+ join_ast->using_expression_list = std::move(join_expression_ast);
+ else
+ join_ast->on_expression = std::move(join_expression_ast);
+ }
+
+ return join_ast;
+}
+
+void JoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
+{
+ buffer << std::string(indent, ' ') << "JOIN id: " << format_state.getNodeId(this);
+
+ if (locality != JoinLocality::Unspecified)
+ buffer << ", locality: " << toString(locality);
+
+ if (strictness != JoinStrictness::Unspecified)
+ buffer << ", strictness: " << toString(strictness);
+
+ buffer << ", kind: " << toString(kind);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "LEFT TABLE EXPRESSION\n";
+ getLeftTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+
+ buffer << '\n' << std::string(indent + 2, ' ') << "RIGHT TABLE EXPRESSION\n";
+ getRightTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+
+ if (getJoinExpression())
+ {
+ buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSION\n";
+ getJoinExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
+ }
+}
+
+bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs) const
+{
+ const auto & rhs_typed = assert_cast(rhs);
+ return locality == rhs_typed.locality && strictness == rhs_typed.strictness && kind == rhs_typed.kind;
+}
+
+void JoinNode::updateTreeHashImpl(HashState & state) const
+{
+ state.update(locality);
+ state.update(strictness);
+ state.update(kind);
+}
+
+QueryTreeNodePtr JoinNode::cloneImpl() const
+{
+ return std::make_shared(getLeftTableExpression(), getRightTableExpression(), getJoinExpression(), locality, strictness, kind);
+}
+
+ASTPtr JoinNode::toASTImpl() const
+{
+ ASTPtr tables_in_select_query_ast = std::make_shared();
+
+ addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index]);
+
+ size_t join_table_index = tables_in_select_query_ast->children.size();
+
+ auto join_ast = toASTTableJoin();
+
+ addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index]);
+
+ auto & table_element = tables_in_select_query_ast->children.at(join_table_index)->as();
+ table_element.children.push_back(std::move(join_ast));
+ table_element.table_join = table_element.children.back();
+
+ return tables_in_select_query_ast;
+}
+
+}
diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h
new file mode 100644
index 00000000000..15ba11a0122
--- /dev/null
+++ b/src/Analyzer/JoinNode.h
@@ -0,0 +1,152 @@
+#pragma once
+
+#include
+
+#include
+#include