diff --git a/.gitmodules b/.gitmodules index 73f6062fa96..af69300473d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -280,3 +280,6 @@ [submodule "contrib/base-x"] path = contrib/base-x url = https://github.com/ClickHouse/base-x.git +[submodule "contrib/c-ares"] + path = contrib/c-ares + url = https://github.com/ClickHouse/c-ares diff --git a/CHANGELOG.md b/CHANGELOG.md index dfc51952250..56a6e27d8f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.7, 2022-07-21](#226)**
**[ClickHouse release v22.6, 2022-06-16](#226)**
**[ClickHouse release v22.5, 2022-05-19](#225)**
**[ClickHouse release v22.4, 2022-04-20](#224)**
@@ -7,6 +8,172 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+### ClickHouse release 22.7, 2022-07-21 + +#### Upgrade Notes +* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back. + +#### New Feature +* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)). +* Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)). +* Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)). +* Implement NATS table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). ([Kseniia Sumarokova](https://github.com/kssenii)) +* Implement table function `mongodb`. Allow writes into `MongoDB` storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). ([Kseniia Sumarokova](https://github.com/kssenii)) +* Add `SQLInsert` output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)). +* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. It translates some characters to another. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `parseTimeDelta` function. It can be used like ` ;-+,:` can be used as separators, eg. `1yr-2mo`, `2m:6s`: `SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds')`. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)). +* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)). +* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)). +* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add an option to require explicit grants to SELECT from the `system` database. Details: [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)). +* Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)). SQL function `multiSearchAllPositions` now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)). +* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)). +* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)). +* Support `ALTER` for `Hive` tables. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)). +* Support `isNullable` function. This function checks whether it's argument is nullable and return 1 or 0. Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)). +* Added functions for base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)). +* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added L2 Squared distance and norm functions for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)). +* Add ability to pass HTTP headers to the `url` table function / storage via SQL. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Experimental Feature +* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)). + +#### Performance Improvement +* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)). +* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)). +* More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)). +* Fix significant join performance regression which was introduced in [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616). It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)). +* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)). +* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)). +* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)). +* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)). +* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). +* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)). +* Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)). +* `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)). +* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). It improves performance 1..2% on average for Intel. +* Executable UDF, executable dictionaries, and Executable tables will avoid wasting one second during wait for subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)). +* Optimize accesses to `system.stack_trace` table if not all columns are selected. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)). +* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)). +* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Improvement +* Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). +* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). +* Remove subprocess run for kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). +* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). +* Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). +* Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). +* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)). +* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add access check for `system drop filesystem cache`. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `revision` function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)). +* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)). +* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New option `optimize = 1` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)). +* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)). +* Bugfixes and performance improvements for `parallel_hash` JOIN method. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)). +* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)). +* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)). +* S3 single objects are now removed with `RemoveObjectRequest`. Implement compatibility with GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Expose basic ClickHouse Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)). +* Support `auto_close` option for PostgreSQL engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow `NULL` modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)). +* Deactivate `mutations_finalizing_task` before shutdown to avoid benign `TABLE_IS_READ_ONLY` errors during shutdown. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)). +* Eliminate unnecessary waiting of SELECT queries after ALTER queries in presence of INSERT queries if you use deprecated Ordinary databases. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)). +* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)). +* Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)). +* `clickhouse-keeper`: add support for real-time digest calculation and verification. It is disabled by default. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)). +* clickhouse-keeper improvement: persist meta-information about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)). This will make it easier to operate if you shutdown or restart all keeper nodes at the same time. +* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)). +* Add `merge_algorithm` column (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)). +* Don't increment a counter in `system.errors` when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)). +* The metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)). +* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)). +* Rethrow exception on filesystem cache initialization on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)). +* OpenTelemetry now collects traces without Processors spans by default (there are too many). To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)). +* Functions `multiMatch[Fuzzy](AllIndices/Any/AnyIndex)` - don't throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)). + +#### Build/Testing/Packaging Improvement +* Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)). +* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix a bug in `jemalloc` library [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)). +* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)). +* Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) +* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)). +* Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)). +* Correct glob expansion in case of `{0..10}` forms. Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)). +* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)). +* Fix `toHour` monotonicity information for query optimization which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)). +* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)). +* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621) - a buffer overflow on machines with the latest Intel CPUs with AVX-512 VBMI. [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)). +* Fix incorrect partition pruning when there is a nullable partition key. Note: most likely you don't use nullable partition keys - this is an obscure feature you should not use. Nullable keys are a nonsense and this feature is only needed for some crazy use-cases. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)). +* Improve `fsync_part_directory` for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible dealock inside `OvercommitTracker`. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)). +* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix some corner cases of interpretation of the arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)). +* Keep `LowCardinality` type in `tuple` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)). +* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix extremely rare race condition in during hardlinks for remote filesystem. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)). +* (zero-copy replication is an experimental feature that should not be used in production) Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)). +* (MaterializedPostgreSQL - experimental feature). Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix incorrect fetch of table metadata from PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)). +* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix "Missing columns" for GLOBAL JOIN with CTE without alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)). +* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)). +* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)). +* Some minr issue with queries like `CREATE TABLE nested_name_tuples (`a` Tuple(x String, y Tuple(i Int32, j String))) ENGINE = Memory;` [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). +* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)). +* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)). +* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)). +* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)). +* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedPostgreSQL - experimentail feature. Fix possible `Invalid number of rows in Chunk` in MaterializedPostgreSQL. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possibly incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)). +* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)). +* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)). +* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)). +* Fix use-after-free for aggregate functions with `Map` combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)). + ### ClickHouse release 22.6, 2022-06-16 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 7aa6b5d5a50..05f88f3530e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14) +cmake_minimum_required(VERSION 3.15) project(ClickHouse LANGUAGES C CXX ASM) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 616e43ef342..dad7aa938d7 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -1,176 +1,68 @@ -#include "atomic.h" #include -#include // open -#include // O_RDONLY -#include // read, close -#include // ssize_t -#include // perror, fprintf -#include // ElfW +#include "atomic.h" +#include // __environ #include -#define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0])) - -// We don't have libc struct available here. -// Compute aux vector manually (from /proc/self/auxv). -// -// Right now there is only 51 AT_* constants, -// so 64 should be enough until this implementation will be replaced with musl. -static unsigned long __auxv_procfs[64]; +// We don't have libc struct available here. Compute aux vector manually. +static unsigned long * __auxv = NULL; static unsigned long __auxv_secure = 0; -// Common -static unsigned long * __auxv_environ = NULL; -static void * volatile getauxval_func; - -static unsigned long __auxv_init_environ(unsigned long type); - -// -// auxv from procfs interface -// -ssize_t __retry_read(int fd, void * buf, size_t count) -{ - for (;;) - { - ssize_t ret = read(fd, buf, count); - if (ret == -1) - { - if (errno == EINTR) - { - continue; - } - perror("Cannot read /proc/self/auxv"); - abort(); - } - return ret; - } -} -unsigned long __getauxval_procfs(unsigned long type) -{ - if (type == AT_SECURE) - { - return __auxv_secure; - } - - if (type >= ARRAY_SIZE(__auxv_procfs)) - { - errno = ENOENT; - return 0; - } - - return __auxv_procfs[type]; -} -static unsigned long __auxv_init_procfs(unsigned long type) -{ - // For debugging: - // - od -t dL /proc/self/auxv - // - LD_SHOW_AUX= ls - int fd = open("/proc/self/auxv", O_RDONLY); - // It is possible in case of: - // - no procfs mounted - // - on android you are not able to read it unless running from shell or debugging - // - some other issues - if (fd == -1) - { - // Fallback to environ. - a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__auxv_init_environ); - return __auxv_init_environ(type); - } - - ElfW(auxv_t) aux; - - /// NOTE: sizeof(aux) is very small (less then PAGE_SIZE), so partial read should not be possible. - _Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)"); - while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux)) - { - if (aux.a_type >= ARRAY_SIZE(__auxv_procfs)) - { - fprintf(stderr, "AT_* is out of range: %li (maximum allowed is %zu)\n", aux.a_type, ARRAY_SIZE(__auxv_procfs)); - abort(); - } - if (__auxv_procfs[aux.a_type]) - { - fprintf(stderr, "AUXV already has value (%zu)\n", __auxv_procfs[aux.a_type]); - abort(); - } - __auxv_procfs[aux.a_type] = aux.a_un.a_val; - } - close(fd); - - __auxv_secure = __getauxval_procfs(AT_SECURE); - - // Now we've initialized __auxv_procfs, next time getauxval() will only call __get_auxval(). - a_cas_p(&getauxval_func, (void *)__auxv_init_procfs, (void *)__getauxval_procfs); - - return __getauxval_procfs(type); -} - -// -// auxv from environ interface -// -// NOTE: environ available only after static initializers, -// so you cannot rely on this if you need getauxval() before. -// -// Good example of such user is sanitizers, for example -// LSan will not work with __auxv_init_environ(), -// since it needs getauxval() before. -// static size_t __find_auxv(unsigned long type) { size_t i; - for (i = 0; __auxv_environ[i]; i += 2) + for (i = 0; __auxv[i]; i += 2) { - if (__auxv_environ[i] == type) - { + if (__auxv[i] == type) return i + 1; - } } return (size_t) -1; } -unsigned long __getauxval_environ(unsigned long type) + +unsigned long __getauxval(unsigned long type) { if (type == AT_SECURE) return __auxv_secure; - if (__auxv_environ) + if (__auxv) { size_t index = __find_auxv(type); if (index != ((size_t) -1)) - return __auxv_environ[index]; + return __auxv[index]; } errno = ENOENT; return 0; } -static unsigned long __auxv_init_environ(unsigned long type) + +static void * volatile getauxval_func; + +static unsigned long __auxv_init(unsigned long type) { if (!__environ) { - // __environ is not initialized yet so we can't initialize __auxv_environ right now. + // __environ is not initialized yet so we can't initialize __auxv right now. // That's normally occurred only when getauxval() is called from some sanitizer's internal code. errno = ENOENT; return 0; } - // Initialize __auxv_environ and __auxv_secure. + // Initialize __auxv and __auxv_secure. size_t i; for (i = 0; __environ[i]; i++); - __auxv_environ = (unsigned long *) (__environ + i + 1); + __auxv = (unsigned long *) (__environ + i + 1); size_t secure_idx = __find_auxv(AT_SECURE); if (secure_idx != ((size_t) -1)) - __auxv_secure = __auxv_environ[secure_idx]; + __auxv_secure = __auxv[secure_idx]; - // Now we need to switch to __getauxval_environ for all later calls, since - // everything is initialized. - a_cas_p(&getauxval_func, (void *)__auxv_init_environ, (void *)__getauxval_environ); + // Now we've initialized __auxv, next time getauxval() will only call __get_auxval(). + a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval); - return __getauxval_environ(type); + return __getauxval(type); } -// Callchain: -// - __auxv_init_procfs -> __getauxval_environ -// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ -static void * volatile getauxval_func = (void *)__auxv_init_procfs; +// First time getauxval() will call __auxv_init(). +static void * volatile getauxval_func = (void *)__auxv_init; unsigned long getauxval(unsigned long type) { diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index e6c60e74c36..edc7805150b 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54464) +SET(VERSION_REVISION 54465) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 7000c4e0033bb9e69050ab8ef73e8e7465f78059) -SET(VERSION_DESCRIBE v22.7.1.1-testing) -SET(VERSION_STRING 22.7.1.1) +SET(VERSION_GITHASH f4f05ec786a8b8966dd0ea2a2d7e39a8c7db24f4) +SET(VERSION_DESCRIBE v22.8.1.1-testing) +SET(VERSION_STRING 22.8.1.1) # end of autochange diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake index 965ea024ab7..bdcfcfa013a 100644 --- a/cmake/linux/toolchain-x86_64.cmake +++ b/cmake/linux/toolchain-x86_64.cmake @@ -1,3 +1,19 @@ +if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED) + # During first run of cmake the toolchain file will be loaded twice, + # - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake + # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake + # + # But once you already have non-empty cmake cache it will be loaded only + # once: + # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake + # + # This has no harm except for double load of toolchain will add + # --gcc-toolchain multiple times that will not allow ccache to reuse the + # cache. + return() +endif() +set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 234dbcdc166..0eba4da4a89 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -157,6 +157,7 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (base-x-cmake base-x) +add_contrib(c-ares-cmake c-ares) add_contrib (qpl-cmake qpl) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. diff --git a/contrib/c-ares b/contrib/c-ares new file mode 160000 index 00000000000..afee6748b0b --- /dev/null +++ b/contrib/c-ares @@ -0,0 +1 @@ +Subproject commit afee6748b0b99acf4509d42fa37ac8422262f91b diff --git a/contrib/c-ares-cmake/CMakeLists.txt b/contrib/c-ares-cmake/CMakeLists.txt new file mode 100644 index 00000000000..603c1f8b65c --- /dev/null +++ b/contrib/c-ares-cmake/CMakeLists.txt @@ -0,0 +1,35 @@ +# Choose to build static or shared library for c-ares. +if (USE_STATIC_LIBRARIES) + set(CARES_STATIC ON CACHE BOOL "" FORCE) + set(CARES_SHARED OFF CACHE BOOL "" FORCE) +else () + set(CARES_STATIC OFF CACHE BOOL "" FORCE) + set(CARES_SHARED ON CACHE BOOL "" FORCE) +endif () + +# Disable looking for libnsl on a platforms that has gethostbyname in glibc +# +# c-ares searching for gethostbyname in the libnsl library, however in the +# version that shipped with gRPC it doing it wrong [1], since it uses +# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in +# another dependent library. The upstream already contains correct macro [2], +# but it is not included in gRPC (even upstream gRPC, not the one that is +# shipped with clickhousee). +# +# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125 +# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146 +# +# And because if you by some reason have libnsl [3] installed, clickhouse will +# reject to start w/o it. While this is completelly different library. +# +# [3]: https://packages.debian.org/bullseye/libnsl2 +if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS") + set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE) +endif() + +# Force use of c-ares inet_net_pton instead of libresolv one +set(HAVE_INET_NET_PTON OFF CACHE BOOL "" FORCE) + +add_subdirectory("../c-ares/" "../c-ares/") + +add_library(ch_contrib::c-ares ALIAS c-ares) \ No newline at end of file diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 520e04d198e..b1ed7e464b6 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -45,38 +45,11 @@ set(_gRPC_SSL_LIBRARIES OpenSSL::Crypto OpenSSL::SSL) # Use abseil-cpp from ClickHouse contrib, not from gRPC third_party. set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) -# Choose to build static or shared library for c-ares. -if (USE_STATIC_LIBRARIES) - set(CARES_STATIC ON CACHE BOOL "" FORCE) - set(CARES_SHARED OFF CACHE BOOL "" FORCE) -else () - set(CARES_STATIC OFF CACHE BOOL "" FORCE) - set(CARES_SHARED ON CACHE BOOL "" FORCE) -endif () - -# Disable looking for libnsl on a platforms that has gethostbyname in glibc -# -# c-ares searching for gethostbyname in the libnsl library, however in the -# version that shipped with gRPC it doing it wrong [1], since it uses -# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in -# another dependent library. The upstream already contains correct macro [2], -# but it is not included in gRPC (even upstream gRPC, not the one that is -# shipped with clickhousee). -# -# [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125 -# [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146 -# -# And because if you by some reason have libnsl [3] installed, clickhouse will -# reject to start w/o it. While this is completelly different library. -# -# [3]: https://packages.debian.org/bullseye/libnsl2 -if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS") - set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE) -endif() - # We don't want to build C# extensions. set(gRPC_BUILD_CSHARP_EXT OFF) +set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares) +set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE) add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") # The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes, diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index a108e6537c9..4a4a5cef62e 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -93,6 +93,18 @@ set (CMAKE_CXX_STANDARD 17) set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") +set_directory_properties (PROPERTIES + # due to llvm crosscompile cmake does not know how to clean it, and on clean + # will lead to the following error: + # + # ninja: error: remove(contrib/llvm/llvm/NATIVE): Directory not empty + # + ADDITIONAL_CLEAN_FILES "${LLVM_BINARY_DIR}" + # llvm's cmake configuring this file only when cmake runs, + # and after clean cmake will not know that it should re-run, + # add explicitly depends from llvm-config.h + CMAKE_CONFIGURE_DEPENDS "${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h" +) add_library (_llvm INTERFACE) target_link_libraries (_llvm INTERFACE ${REQUIRED_LLVM_LIBRARIES}) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 2bbdd978e5e..6b8109a15b2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -135,6 +135,7 @@ function clone_submodules contrib/replxx contrib/wyhash contrib/hashidsxx + contrib/c-ares ) git submodule sync diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml new file mode 100644 index 00000000000..b329d4e0a46 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -0,0 +1,9 @@ +version: "2.3" + +services: + coredns: + image: coredns/coredns:latest + restart: always + volumes: + - ${COREDNS_CONFIG_DIR}/example.com:/example.com + - ${COREDNS_CONFIG_DIR}/Corefile:/Corefile diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 06bf05a1727..ffa0b12b8a3 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -362,6 +362,8 @@ else # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") + # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected + # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ @@ -389,6 +391,8 @@ else -e "} TCPHandler: Code:" \ -e "} executeQuery: Code:" \ -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ + -e "This engine is deprecated and is not supported in transactions" \ + -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docker/test/stress/stress b/docker/test/stress/stress index ab25d13695b..6d90b9d5437 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -46,6 +46,9 @@ def get_options(i, backward_compatibility_check): if i == 13: client_options.append("memory_tracker_fault_probability=0.001") + if i % 2 == 1 and not backward_compatibility_check: + client_options.append("group_by_use_nulls=1") + if client_options: options.append(" --client-option " + " ".join(client_options)) diff --git a/docs/README.md b/docs/README.md index b328a3ee125..fa8b6bed85c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,9 +38,9 @@ Writing the docs is extremely useful for project's users and developers, and gro The documentation contains information about all the aspects of the ClickHouse lifecycle: developing, testing, installing, operating, and using. The base language of the documentation is English. The English version is the most actual. All other languages are supported as much as they can by contributors from different countries. -At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). +At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, and Chinese. We store the reference documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs), and user guides in a separate repo [Clickhouse/clickhouse-docs](https://github.com/ClickHouse/clickhouse-docs). -Each language lays in the corresponding folder. Files that are not translated from English are the symbolic links to the English ones. +Each language lies in the corresponding folder. Files that are not translated from English are symbolic links to the English ones. @@ -48,9 +48,9 @@ Each language lays in the corresponding folder. Files that are not translated fr You can contribute to the documentation in many ways, for example: -- Fork the ClickHouse repository, edit, commit, push, and open a pull request. +- Fork the ClickHouse and ClickHouse-docs repositories, edit, commit, push, and open a pull request. - Add the `documentation` label to this pull request for proper automatic checks applying. If you have no permissions for adding labels, the reviewer of your PR adds it. + Add the `pr-documentation` label to this pull request for proper automatic checks applying. If you do not have permission to add labels, then the reviewer of your PR will add it. - Open a required file in the ClickHouse repository and edit it from the GitHub web interface. @@ -158,15 +158,15 @@ When everything is ready, we will add the new language to the website. -### Documentation for Different Audience +### Documentation for Different Audiences -When writing documentation, think about people who read it. Each audience has specific requirements for terms they use in communications. +When writing documentation, think about the people who read it. Each audience has specific requirements for terms they use in communications. -ClickHouse documentation can be divided by the audience for the following parts: +ClickHouse documentation can be divided up by the audience for the following parts: -- Conceptual topics in [Introduction](https://clickhouse.com/docs/en/), tutorials and overviews, changelog. +- Conceptual topics like tutorials and overviews. - These topics are for the most common auditory. When editing text in them, use the most common terms that are comfortable for the audience with basic technical skills. + These topics are for the most common audience. When editing text in them, use the most common terms that are comfortable for the audience with basic technical skills. - Query language reference and related topics. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index dbb90f8e537..e12884b61c4 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -75,7 +75,7 @@ This will create the `programs/clickhouse` executable, which can be used with `c The build requires the following components: - Git (is used only to checkout the sources, it’s not needed for the build) -- CMake 3.14 or newer +- CMake 3.15 or newer - Ninja - C++ compiler: clang-14 or newer - Linker: lld diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 5d8ed9cdacd..e499849426b 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1632,6 +1632,8 @@ kafka_topic_list = 'topic1', kafka_group_name = 'group1', kafka_format = 'AvroConfluent'; +-- for debug purposes you can set format_avro_schema_registry_url in a session. +-- this way cannot be used in production SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 75c2aa57b32..ed1f139f482 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2626,7 +2626,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 1800. +Default value: 180. ## http_receive_timeout {#http_receive_timeout} @@ -2637,7 +2637,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 1800. +Default value: 180. ## check_query_single_value_result {#check_query_single_value_result} @@ -3329,6 +3329,15 @@ Read more about [memory overcommit](memory-overcommit.md). Default value: `1GiB`. +## compatibility {#compatibility} + +This setting changes other settings according to provided ClickHouse version. +If a behaviour in ClickHouse was changed by using a different default value for some setting, this compatibility setting allows you to use default values from previous versions for all the settings that were not set by the user. + +This setting takes ClickHouse version number as a string, like `21.3`, `21.8`. Empty value means that this setting is disabled. + +Disabled by default. + # Format settings {#format-settings} ## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} diff --git a/docs/tools/README.md b/docs/tools/README.md index 163600804c6..7cf3540d108 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -1,50 +1,94 @@ -## How ClickHouse documentation is generated? {#how-clickhouse-documentation-is-generated} +## Generating ClickHouse documentation {#how-clickhouse-documentation-is-generated} -ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it’s dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs for each single page version. The results are then put in the correct directory structure. It is recommended to use Python 3.7 to run this script. +ClickHouse documentation is built using [Docusaurus](https://docusaurus.io). -[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.com) from [../../website](../../website) folder then pushes to specified GitHub repo to be served via [GitHub Pages](https://pages.github.com). +## Check the look of your documentation changes {#how-to-check-if-the-documentation-will-look-fine} -## How to check if the documentation will look fine? {#how-to-check-if-the-documentation-will-look-fine} +There are a few options that are all useful depending on how large or complex your edits are. -There are few options that are all useful depending on how large or complex your edits are. +### Use the GitHub web interface to edit -### Use GitHub web interface to edit +Every page in the docs has an **Edit this page** link that opens the page in the GitHub editor. GitHub has Markdown support with a preview feature. The details of GitHub Markdown and the documentation Markdown are a bit different but generally this is close enough, and the person merging your PR will build the docs and check them. -GitHub has Markdown support with preview feature, but the details of GitHub Markdown dialect are a bit different in ClickHouse documentation. +### Install a Markdown editor or plugin for your IDE {#install-markdown-editor-or-plugin-for-your-ide} -### Install Markdown editor or plugin for your IDE {#install-markdown-editor-or-plugin-for-your-ide} +Usually, these plugins provide a preview of how the markdown will render, and they catch basic errors like unclosed tags very early. -Usually those also have some way to preview how Markdown will look like, which allows to catch basic errors like unclosed tags very early. -### Use build.py {#use-build-py} +## Build the docs locally {#use-build-py} -It’ll take some effort to go through, but the result will be very close to production documentation. +You can build the docs locally. It takes a few minutes to set up, but once you have done it the first time, the process is very simple. -For the first time you’ll need to: +### Clone the repos -#### 1. Set up virtualenv +The documentation is in two repos, clone both of them: +- [ClickHouse/ClickHouse](https://github.com/ClickHouse/ClickHouse) +- [ClickHouse/ClickHouse-docs](https://github.com/ClickHouse/clickhouse-docs) -``` bash -$ cd ClickHouse/docs/tools -$ mkdir venv -$ virtualenv -p $(which python3) venv -$ source venv/bin/activate -$ pip3 install -r requirements.txt +### Install Node.js + +The documentation is built with Docusaurus, which requires Node.js. We recommend version 16. Install [Node.js](https://nodejs.org/en/download/). + +### Copy files into place + +Docusaurus expects all of the markdown files to be located in the directory tree `clickhouse-docs/docs/`. This is not the way our repos are set up, so some copying of files is needed to build the docs: + +```bash +# from the parent directory of both the ClickHouse/ClickHouse and ClickHouse-clickhouse-docs repos: +cp -r ClickHouse/docs/en/development clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/engines clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/getting-started clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/interfaces clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/operations clickhouse-docs/docs/en/ +cp -r ClickHouse/docs/en/sql-reference clickhouse-docs/docs/en/ + +cp -r ClickHouse/docs/ru/* clickhouse-docs/docs/ru/ +cp -r ClickHouse/docs/zh clickhouse-docs/docs/ ``` -#### 2. Run build.py +#### Note: Symlinks will not work. +### Setup Docusaurus -When all prerequisites are installed, running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website. +There are two commands that you may need to use with Docusaurus: +- `yarn install` +- `yarn start` -The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888. +#### Install Docusaurus and its dependencies: + +```bash +cd clickhouse-docs +yarn install +``` + +#### Start a development Docusaurus environment + +This command will start Docusaurus in development mode, which means that as you edit source (for example, `.md` files) files the changes will be rendered into HTML files and served by the Docusaurus development server. + +```bash +yarn start +``` + +### Make your changes to the markdown files + +Edit your files. Remember that if you are editing files in the `ClickHouse/ClickHouse` repo then you should edit them +in that repo and then copy the edited file into the `ClickHouse/clickhouse-docs/` directory structure so that they are updated in your develoment environment. + +`yarn start` probably opened a browser for you when you ran it; if not, open a browser to `http://localhost:3000/docs/en/intro` and navigate to the documentation that you are changing. If you have already made the changes, you can verify them here; if not, make them, and you will see the page update as you save the changes. ## How to change code highlighting? {#how-to-change-code-hl} -ClickHouse does not use mkdocs `highlightjs` feature. It uses modified pygments styles instead. -If you want to change code highlighting, edit the `website/css/highlight.css` file. -Currently, an [eighties](https://github.com/idleberg/base16-pygments/blob/master/css/base16-eighties.dark.css) theme -is used. +Code highlighting is based on the language chosen for your code blocks. Specify the language when you start the code block: +
```sql
+SELECT firstname from imdb.actors;
+```
+
+ +```sql +SELECT firstname from imdb.actors; +``` + +If you need a language supported then open an issue in [ClickHouse-docs](https://github.com/ClickHouse/clickhouse-docs/issues). ## How to subscribe on documentation changes? {#how-to-subscribe-on-documentation-changes} At the moment there’s no easy way to do just that, but you can consider: diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index cf9b7cbafea..584806951cf 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -102,9 +102,34 @@ void Client::processError(const String & query) const } +void Client::showWarnings() +{ + try + { + std::vector messages = loadWarningMessages(); + if (!messages.empty()) + { + std::cout << "Warnings:" << std::endl; + for (const auto & message : messages) + std::cout << " * " << message << std::endl; + std::cout << std::endl; + } + } + catch (...) + { + /// Ignore exception + } +} + /// Make query to get all server warnings std::vector Client::loadWarningMessages() { + /// Older server versions cannot execute the query loading warnings. + constexpr UInt64 min_server_revision_to_load_warnings = DBMS_MIN_PROTOCOL_VERSION_WITH_VIEW_IF_PERMITTED; + + if (server_revision < min_server_revision_to_load_warnings) + return {}; + std::vector messages; connection->sendQuery(connection_parameters.timeouts, "SELECT * FROM viewIfPermitted(SELECT message FROM system.warnings ELSE null('message String'))", @@ -226,25 +251,9 @@ try connect(); - /// Load Warnings at the beginning of connection + /// Show warnings at the beginning of connection. if (is_interactive && !config().has("no-warnings")) - { - try - { - std::vector messages = loadWarningMessages(); - if (!messages.empty()) - { - std::cout << "Warnings:" << std::endl; - for (const auto & message : messages) - std::cout << " * " << message << std::endl; - std::cout << std::endl; - } - } - catch (...) - { - /// Ignore exception - } - } + showWarnings(); if (is_interactive && !delayed_interactive) { @@ -370,7 +379,7 @@ void Client::connect() } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); - load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)); + load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION) && !config().getBool("disable_suggestion", false); if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty()) server_display_name = config().getString("host", "localhost"); diff --git a/programs/client/Client.h b/programs/client/Client.h index 164b8e2ebaa..1fec282be51 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -45,6 +45,7 @@ protected: private: void printChangedSettings() const; + void showWarnings(); std::vector loadWarningMessages(); }; } diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f9cd7444287..1e5852fe651 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 6e9a7e64324..af48f0de097 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 8c6bfac3a9b..e76bb9e65fb 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 2bcbb045d67..22cffdd21fd 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 4a377cc7225..6322cf4b47d 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index aa472fa217e..6b77a27e918 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index d9925fbd93e..c1d3129bb8d 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index c8ae91ea8d5..0b1c5823c81 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,6 +1,7 @@ #pragma once #include "ICommand.h" +#include namespace DB { diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index 85d7065d823..efbdf3924e8 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -110,18 +110,24 @@ namespace } /// Returns the host name by its address. - String getHostByAddress(const IPAddress & address) + Strings getHostsByAddress(const IPAddress & address) { - String host = DNSResolver::instance().reverseResolve(address); + auto hosts = DNSResolver::instance().reverseResolve(address); - /// Check that PTR record is resolved back to client address - if (!isAddressOfHost(address, host)) - throw Exception("Host " + String(host) + " isn't resolved back to " + address.toString(), ErrorCodes::DNS_ERROR); + if (hosts.empty()) + throw Exception(ErrorCodes::DNS_ERROR, "{} could not be resolved", address.toString()); - return host; + + for (const auto & host : hosts) + { + /// Check that PTR record is resolved back to client address + if (!isAddressOfHost(address, host)) + throw Exception(ErrorCodes::DNS_ERROR, "Host {} isn't resolved back to {}", host, address.toString()); + } + + return hosts; } - void parseLikePatternIfIPSubnet(const String & pattern, IPSubnet & subnet, IPAddress::Family address_family) { size_t slash = pattern.find('/'); @@ -520,20 +526,29 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const return true; /// Check `name_regexps`. - std::optional resolved_host; + std::optional resolved_hosts; auto check_name_regexp = [&](const String & name_regexp_) { try { if (boost::iequals(name_regexp_, "localhost")) return is_client_local(); - if (!resolved_host) - resolved_host = getHostByAddress(client_v6); - if (resolved_host->empty()) - return false; - Poco::RegularExpression re(name_regexp_); - Poco::RegularExpression::Match match; - return re.match(*resolved_host, match) != 0; + if (!resolved_hosts) + { + resolved_hosts = getHostsByAddress(client_address); + } + + for (const auto & host : resolved_hosts.value()) + { + Poco::RegularExpression re(name_regexp_); + Poco::RegularExpression::Match match; + if (re.match(host, match) != 0) + { + return true; + } + } + + return false; } catch (const Exception & e) { diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 994abc7b53a..0cbe420f345 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.h b/src/AggregateFunctions/parseAggregateFunctionParameters.h index a67bc081303..41a04324f6d 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.h +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.h @@ -8,6 +8,8 @@ namespace DB { +struct Array; + Array getAggregateFunctionParametersArray( const ASTPtr & expression_list, const std::string & error_context, diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index 050a51939b6..380ae36a8e3 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 01d30876dec..f4d3be14da6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -382,7 +382,7 @@ if (TARGET ch_contrib::rdkafka) endif() if (TARGET ch_contrib::nats_io) - dbms_target_link_libraries(PRIVATE ch_contrib::nats_io) + dbms_target_link_libraries(PRIVATE ch_contrib::nats_io ch_contrib::uv) endif() if (TARGET ch_contrib::sasl2) @@ -453,6 +453,9 @@ if (TARGET ch_contrib::avrocpp) dbms_target_link_libraries(PRIVATE ch_contrib::avrocpp) endif () +set_source_files_properties(Common/CaresPTRResolver.cpp PROPERTIES COMPILE_FLAGS -Wno-reserved-identifier) +target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::c-ares) + if (TARGET OpenSSL::Crypto) dbms_target_link_libraries (PRIVATE OpenSSL::Crypto) target_link_libraries (clickhouse_common_io PRIVATE OpenSSL::Crypto) diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h index 65b60ceffc4..25d45f7ffaf 100644 --- a/src/Client/Suggest.h +++ b/src/Client/Suggest.h @@ -28,8 +28,8 @@ public: template void load(ContextPtr context, const ConnectionParameters & connection_parameters, Int32 suggestion_limit); - /// Older server versions cannot execute the query above. - static constexpr int MIN_SERVER_REVISION = 54406; + /// Older server versions cannot execute the query loading suggestions. + static constexpr int MIN_SERVER_REVISION = DBMS_MIN_PROTOCOL_VERSION_WITH_VIEW_IF_PERMITTED; private: void fetch(IServerConnection & connection, const ConnectionTimeouts & timeouts, const std::string & query); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index d8e98ec9406..8d61f6e726a 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -793,4 +793,18 @@ ColumnPtr makeNullable(const ColumnPtr & column) return ColumnNullable::create(column, ColumnUInt8::create(column->size(), 0)); } +ColumnPtr makeNullableSafe(const ColumnPtr & column) +{ + if (isColumnNullable(*column)) + return column; + + if (isColumnConst(*column)) + return ColumnConst::create(makeNullableSafe(assert_cast(*column).getDataColumnPtr()), column->size()); + + if (column->canBeInsideNullable()) + return makeNullable(column); + + return column; +} + } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 52e57f7f0d0..e832f6d20e5 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -223,5 +223,6 @@ private: }; ColumnPtr makeNullable(const ColumnPtr & column); +ColumnPtr makeNullableSafe(const ColumnPtr & column); } diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp new file mode 100644 index 00000000000..f6228e97c02 --- /dev/null +++ b/src/Common/CaresPTRResolver.cpp @@ -0,0 +1,109 @@ +#include "CaresPTRResolver.h" +#include +#include +#include +#include "ares.h" +#include "netdb.h" + +namespace DB +{ + + namespace ErrorCodes + { + extern const int DNS_ERROR; + } + + static void callback(void * arg, int status, int, struct hostent * host) + { + auto * ptr_records = reinterpret_cast*>(arg); + if (status == ARES_SUCCESS && host->h_aliases) + { + int i = 0; + while (auto * ptr_record = host->h_aliases[i]) + { + ptr_records->emplace_back(ptr_record); + i++; + } + } + } + + CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr) + { + /* + * ares_library_init is not thread safe. Currently, the only other usage of c-ares seems to be in grpc. + * In grpc, ares_library_init seems to be called only in Windows. + * See https://github.com/grpc/grpc/blob/master/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc#L1187 + * That means it's safe to init it here, but we should be cautious when introducing new code that depends on c-ares and even updates + * to grpc. As discussed in https://github.com/ClickHouse/ClickHouse/pull/37827#discussion_r919189085, c-ares should be adapted to be atomic + * */ + if (ares_library_init(ARES_LIB_INIT_ALL) != ARES_SUCCESS || ares_init(&channel) != ARES_SUCCESS) + { + throw DB::Exception("Failed to initialize c-ares", DB::ErrorCodes::DNS_ERROR); + } + } + + CaresPTRResolver::~CaresPTRResolver() + { + ares_destroy(channel); + ares_library_cleanup(); + } + + std::vector CaresPTRResolver::resolve(const std::string & ip) + { + std::vector ptr_records; + + resolve(ip, ptr_records); + wait(); + + return ptr_records; + } + + std::vector CaresPTRResolver::resolve_v6(const std::string & ip) + { + std::vector ptr_records; + + resolve_v6(ip, ptr_records); + wait(); + + return ptr_records; + } + + void CaresPTRResolver::resolve(const std::string & ip, std::vector & response) + { + in_addr addr; + + inet_pton(AF_INET, ip.c_str(), &addr); + + ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET, callback, &response); + } + + void CaresPTRResolver::resolve_v6(const std::string & ip, std::vector & response) + { + in6_addr addr; + inet_pton(AF_INET6, ip.c_str(), &addr); + + ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET6, callback, &response); + } + + void CaresPTRResolver::wait() + { + timeval * tvp, tv; + fd_set read_fds; + fd_set write_fds; + int nfds; + + for (;;) + { + FD_ZERO(&read_fds); + FD_ZERO(&write_fds); + nfds = ares_fds(channel, &read_fds,&write_fds); + if (nfds == 0) + { + break; + } + tvp = ares_timeout(channel, nullptr, &tv); + select(nfds, &read_fds, &write_fds, nullptr, tvp); + ares_process(channel, &read_fds, &write_fds); + } + } +} diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h new file mode 100644 index 00000000000..fd6a1cf7bc5 --- /dev/null +++ b/src/Common/CaresPTRResolver.h @@ -0,0 +1,42 @@ +#pragma once + +#include "DNSPTRResolver.h" + +using ares_channel = struct ares_channeldata *; + +namespace DB +{ + + /* + * Implements reverse DNS resolution using c-ares lib. System reverse DNS resolution via + * gethostbyaddr or getnameinfo does not work reliably because in some systems + * it returns all PTR records for a given IP and in others it returns only one. + * */ + class CaresPTRResolver : public DNSPTRResolver + { + friend class DNSPTRResolverProvider; + + /* + * Allow only DNSPTRProvider to instantiate this class + * */ + struct provider_token {}; + + public: + explicit CaresPTRResolver(provider_token); + ~CaresPTRResolver() override; + + std::vector resolve(const std::string & ip) override; + + std::vector resolve_v6(const std::string & ip) override; + + private: + void wait(); + + void resolve(const std::string & ip, std::vector & response); + + void resolve_v6(const std::string & ip, std::vector & response); + + ares_channel channel; + }; +} + diff --git a/src/Common/DNSPTRResolver.h b/src/Common/DNSPTRResolver.h new file mode 100644 index 00000000000..e6cce83f79d --- /dev/null +++ b/src/Common/DNSPTRResolver.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + struct DNSPTRResolver + { + + virtual ~DNSPTRResolver() = default; + + virtual std::vector resolve(const std::string & ip) = 0; + + virtual std::vector resolve_v6(const std::string & ip) = 0; + + }; +} diff --git a/src/Common/DNSPTRResolverProvider.cpp b/src/Common/DNSPTRResolverProvider.cpp new file mode 100644 index 00000000000..41c73f4f36f --- /dev/null +++ b/src/Common/DNSPTRResolverProvider.cpp @@ -0,0 +1,13 @@ +#include "DNSPTRResolverProvider.h" +#include "CaresPTRResolver.h" + +namespace DB +{ + std::shared_ptr DNSPTRResolverProvider::get() + { + static auto cares_resolver = std::make_shared( + CaresPTRResolver::provider_token {} + ); + return cares_resolver; + } +} diff --git a/src/Common/DNSPTRResolverProvider.h b/src/Common/DNSPTRResolverProvider.h new file mode 100644 index 00000000000..a7f534749e3 --- /dev/null +++ b/src/Common/DNSPTRResolverProvider.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include "DNSPTRResolver.h" + +namespace DB +{ + /* + * Provides a ready-to-use DNSPTRResolver instance. + * It hides 3rd party lib dependencies, handles initialization and lifetime. + * Since `get` function is static, it can be called from any context. Including cached static functions. + * */ + class DNSPTRResolverProvider + { + public: + static std::shared_ptr get(); + }; +} diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 0616e324b73..10797b7a809 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,7 @@ #include #include #include +#include "DNSPTRResolverProvider.h" namespace ProfileEvents { @@ -138,16 +139,17 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -static String reverseResolveImpl(const Poco::Net::IPAddress & address) +static Strings reverseResolveImpl(const Poco::Net::IPAddress & address) { - Poco::Net::SocketAddress sock_addr(address, 0); + auto ptr_resolver = DB::DNSPTRResolverProvider::get(); - /// Resolve by hand, because Poco::Net::DNS::hostByAddress(...) does getaddrinfo(...) after getnameinfo(...) - char host[1024]; - int err = getnameinfo(sock_addr.addr(), sock_addr.length(), host, sizeof(host), nullptr, 0, NI_NAMEREQD); - if (err) - throw Exception("Cannot getnameinfo(" + address.toString() + "): " + gai_strerror(err), ErrorCodes::DNS_ERROR); - return host; + if (address.family() == Poco::Net::IPAddress::Family::IPv4) + { + return ptr_resolver->resolve(address.toString()); + } else + { + return ptr_resolver->resolve_v6(address.toString()); + } } struct DNSResolver::Impl @@ -235,7 +237,7 @@ std::vector DNSResolver::resolveAddressList(const std: return addresses; } -String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) +Strings DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) return reverseResolveImpl(address); diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index fdd9799f96f..84c88586636 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -36,8 +36,8 @@ public: std::vector resolveAddressList(const std::string & host, UInt16 port); - /// Accepts host IP and resolves its host name - String reverseResolve(const Poco::Net::IPAddress & address); + /// Accepts host IP and resolves its host names + Strings reverseResolve(const Poco::Net::IPAddress & address); /// Get this server host name String getHostName(); diff --git a/src/Common/LRUFileCache.cpp b/src/Common/LRUFileCache.cpp index 0ce76dbdec6..6306b6de059 100644 --- a/src/Common/LRUFileCache.cpp +++ b/src/Common/LRUFileCache.cpp @@ -45,7 +45,7 @@ void LRUFileCache::initialize() catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); - return; + throw; } } else @@ -841,7 +841,11 @@ void LRUFileCache::loadCacheInfoIntoMemory(std::lock_guard & cache_l /// cache_base_path / key_prefix / key / offset if (!files.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache already initialized"); + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Cache initialization is partially made. " + "This can be a result of a failed first attempt to initialize cache. " + "Please, check log for error messages"); fs::directory_iterator key_prefix_it{cache_base_path}; for (; key_prefix_it != fs::directory_iterator(); ++key_prefix_it) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index cfc364929a3..60efab69433 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -342,6 +342,23 @@ OptimizedRegularExpressionImpl::OptimizedRegularExpressionImpl(cons } } +template +OptimizedRegularExpressionImpl::OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept + : is_trivial(rhs.is_trivial) + , required_substring_is_prefix(rhs.required_substring_is_prefix) + , is_case_insensitive(rhs.is_case_insensitive) + , required_substring(std::move(rhs.required_substring)) + , re2(std::move(rhs.re2)) + , number_of_subpatterns(rhs.number_of_subpatterns) +{ + if (!required_substring.empty()) + { + if (is_case_insensitive) + case_insensitive_substring_searcher.emplace(required_substring.data(), required_substring.size()); + else + case_sensitive_substring_searcher.emplace(required_substring.data(), required_substring.size()); + } +} template bool OptimizedRegularExpressionImpl::match(const char * subject, size_t subject_size) const diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index eaa7b06e309..dad8706a50d 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -56,6 +56,9 @@ public: using StringPieceType = std::conditional_t; OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT + /// StringSearcher store pointers to required_substring, it must be updated on move. + OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept; + OptimizedRegularExpressionImpl(const OptimizedRegularExpressionImpl & rhs) = delete; bool match(const std::string & subject) const { diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 86adeeaf7e5..0050288b1cf 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -1,9 +1,7 @@ #include #include -#include #include #include -#include #include #include @@ -13,6 +11,7 @@ #include #include #include +#include namespace @@ -94,53 +93,15 @@ ShellCommand::~ShellCommand() bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds) { - int status = 0; - LOG_TRACE(getLogger(), "Try wait for shell command pid {} with timeout {}", pid, timeout_in_seconds); wait_called = true; - struct timespec interval {.tv_sec = 1, .tv_nsec = 0}; in.close(); out.close(); err.close(); - if (timeout_in_seconds == 0) - { - /// If there is no timeout before signal try to waitpid 1 time without block so we can avoid sending - /// signal if process is already normally terminated. - - int waitpid_res = waitpid(pid, &status, WNOHANG); - bool process_terminated_normally = (waitpid_res == pid); - return process_terminated_normally; - } - - /// If timeout is positive try waitpid without block in loop until - /// process is normally terminated or waitpid return error - - while (timeout_in_seconds != 0) - { - int waitpid_res = waitpid(pid, &status, WNOHANG); - bool process_terminated_normally = (waitpid_res == pid); - - if (process_terminated_normally) - { - return true; - } - else if (waitpid_res == 0) - { - --timeout_in_seconds; - nanosleep(&interval, nullptr); - - continue; - } - else if (waitpid_res == -1 && errno != EINTR) - { - return false; - } - } - - return false; + return waitForPid(pid, timeout_in_seconds); } void ShellCommand::logCommand(const char * filename, char * const argv[]) diff --git a/src/Common/ShellCommand.h b/src/Common/ShellCommand.h index 190b5bc664e..dfc4a826f62 100644 --- a/src/Common/ShellCommand.h +++ b/src/Common/ShellCommand.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB diff --git a/src/Common/tests/gtest_lsan.cpp b/src/Common/tests/gtest_lsan.cpp deleted file mode 100644 index f6e1984ec58..00000000000 --- a/src/Common/tests/gtest_lsan.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include // ADDRESS_SANITIZER - -#ifdef ADDRESS_SANITIZER - -#include -#include - -#include -#include - -/// Test that ensures that LSan works. -/// -/// Regression test for the case when it may not work, -/// because of broken getauxval() [1]. -/// -/// [1]: https://github.com/ClickHouse/ClickHouse/pull/33957 -TEST(Common, LSan) -{ - int sanitizers_exit_code = 1; - - ASSERT_EXIT({ - std::thread leak_in_thread([]() - { - void * leak = malloc(4096); - ASSERT_NE(leak, nullptr); - }); - leak_in_thread.join(); - - __lsan_do_leak_check(); - }, ::testing::ExitedWithCode(sanitizers_exit_code), ".*LeakSanitizer: detected memory leaks.*"); -} - -#endif diff --git a/src/Common/waitForPid.cpp b/src/Common/waitForPid.cpp new file mode 100644 index 00000000000..38f43ae2f6a --- /dev/null +++ b/src/Common/waitForPid.cpp @@ -0,0 +1,192 @@ +#include +#include +#include +#include + +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wgnu-statement-expression" +#define HANDLE_EINTR(x) ({ \ + decltype(x) eintr_wrapper_result; \ + do { \ + eintr_wrapper_result = (x); \ + } while (eintr_wrapper_result == -1 && errno == EINTR); \ + eintr_wrapper_result; \ +}) + +#if defined(OS_LINUX) + +#include +#include + +#if !defined(__NR_pidfd_open) + #if defined(__x86_64__) + #define SYS_pidfd_open 434 + #elif defined(__aarch64__) + #define SYS_pidfd_open 434 + #elif defined(__ppc64__) + #define SYS_pidfd_open 434 + #elif defined(__riscv) + #define SYS_pidfd_open 434 + #else + #error "Unsupported architecture" + #endif +#else + #define SYS_pidfd_open __NR_pidfd_open +#endif + +namespace DB +{ + +static int syscall_pidfd_open(pid_t pid) +{ + // pidfd_open cannot be interrupted, no EINTR handling + return syscall(SYS_pidfd_open, pid, 0); +} + +static int dir_pidfd_open(pid_t pid) +{ + std::string path = "/proc/" + std::to_string(pid); + return HANDLE_EINTR(open(path.c_str(), O_DIRECTORY)); +} + +static bool supportsPidFdOpen() +{ + VersionNumber pidfd_open_minimal_version(5, 3, 0); + VersionNumber linux_version(Poco::Environment::osVersion()); + return linux_version >= pidfd_open_minimal_version; +} + +static int pidFdOpen(pid_t pid) +{ + // use pidfd_open or just plain old /proc/[pid] open for Linux + if (supportsPidFdOpen()) + { + return syscall_pidfd_open(pid); + } + else + { + return dir_pidfd_open(pid); + } +} + +static int pollPid(pid_t pid, int timeout_in_ms) +{ + struct pollfd pollfd; + + int pid_fd = pidFdOpen(pid); + if (pid_fd == -1) + { + return false; + } + pollfd.fd = pid_fd; + pollfd.events = POLLIN; + int ready = poll(&pollfd, 1, timeout_in_ms); + int save_errno = errno; + close(pid_fd); + errno = save_errno; + return ready; +} +#elif defined(OS_DARWIN) || defined(OS_FREEBSD) + +#include +#include + +namespace DB +{ + +static int pollPid(pid_t pid, int timeout_in_ms) +{ + int status = 0; + int kq = HANDLE_EINTR(kqueue()); + if (kq == -1) + { + return false; + } + struct kevent change = {.ident = NULL}; + EV_SET(&change, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL); + int result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL)); + if (result == -1) + { + if (errno != ESRCH) + { + return false; + } + // check if pid already died while we called kevent() + if (waitpid(pid, &status, WNOHANG) == pid) + { + return true; + } + return false; + } + + struct kevent event = {.ident = NULL}; + struct timespec remaining_timespec = {.tv_sec = timeout_in_ms / 1000, .tv_nsec = (timeout_in_ms % 1000) * 1000000}; + int ready = kevent(kq, nullptr, 0, &event, 1, &remaining_timespec); + int save_errno = errno; + close(kq); + errno = save_errno; + return ready; +} +#else + #error "Unsupported OS type" +#endif + +bool waitForPid(pid_t pid, size_t timeout_in_seconds) +{ + int status = 0; + + Stopwatch watch; + + if (timeout_in_seconds == 0) + { + /// If there is no timeout before signal try to waitpid 1 time without block so we can avoid sending + /// signal if process is already normally terminated. + + int waitpid_res = waitpid(pid, &status, WNOHANG); + bool process_terminated_normally = (waitpid_res == pid); + return process_terminated_normally; + } + + /// If timeout is positive try waitpid without block in loop until + /// process is normally terminated or waitpid return error + + int timeout_in_ms = timeout_in_seconds * 1000; + while (timeout_in_ms > 0) + { + int waitpid_res = waitpid(pid, &status, WNOHANG); + bool process_terminated_normally = (waitpid_res == pid); + if (process_terminated_normally) + { + return true; + } + else if (waitpid_res == 0) + { + watch.restart(); + int ready = pollPid(pid, timeout_in_ms); + if (ready <= 0) + { + if (errno == EINTR || errno == EAGAIN) + { + timeout_in_ms -= watch.elapsedMilliseconds(); + } + else + { + return false; + } + } + continue; + } + else if (waitpid_res == -1 && errno != EINTR) + { + return false; + } + } + return false; +} + +} +#pragma GCC diagnostic pop diff --git a/src/Common/waitForPid.h b/src/Common/waitForPid.h new file mode 100644 index 00000000000..71c1a74712c --- /dev/null +++ b/src/Common/waitForPid.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ +/* + * Waits for a specific pid with timeout, using modern Linux and OSX facilities + * Returns `true` if process terminated successfully or `false` otherwise + */ +bool waitForPid(pid_t pid, size_t timeout_in_seconds); + +} diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 34d69967828..4733adcf67a 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 7c6ed227a06..8261f5d1e26 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB { @@ -111,7 +112,7 @@ KeeperServer::KeeperServer( configuration_and_settings_->snapshot_storage_path, coordination_settings, checkAndGetSuperdigest(configuration_and_settings_->super_digest), - config.getBool("keeper_server.digest_enabled", true))) + config.getBool("keeper_server.digest_enabled", false))) , state_manager(nuraft::cs_new( server_id, "keeper_server", configuration_and_settings_->log_storage_path, configuration_and_settings_->state_file_path, config, coordination_settings)) , log(&Poco::Logger::get("KeeperServer")) diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index 3638a036098..7b56367769e 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -43,9 +43,16 @@ class BaseSettings : public TTraits::Data { using CustomSettingMap = std::unordered_map, SettingFieldCustom>>; public: + BaseSettings() = default; + BaseSettings(const BaseSettings &) = default; + BaseSettings(BaseSettings &&) noexcept = default; + BaseSettings & operator=(const BaseSettings &) = default; + BaseSettings & operator=(BaseSettings &&) noexcept = default; + virtual ~BaseSettings() = default; + using Traits = TTraits; - void set(std::string_view name, const Field & value); + virtual void set(std::string_view name, const Field & value); Field get(std::string_view name) const; void setString(std::string_view name, const String & value); @@ -62,6 +69,8 @@ public: /// Resets all the settings to their default values. void resetToDefault(); + /// Resets specified setting to its default value. + void resetToDefault(std::string_view name); bool has(std::string_view name) const { return hasBuiltin(name) || hasCustom(name); } static bool hasBuiltin(std::string_view name); @@ -315,6 +324,14 @@ void BaseSettings::resetToDefault() custom_settings_map.clear(); } +template +void BaseSettings::resetToDefault(std::string_view name) +{ + const auto & accessor = Traits::Accessor::instance(); + if (size_t index = accessor.find(name); index != static_cast(-1)) + accessor.resetValueToDefault(*this, index); +} + template bool BaseSettings::hasBuiltin(std::string_view name) { diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 2df48a79776..584720694d7 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -52,8 +52,10 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54456 +#define DBMS_TCP_PROTOCOL_VERSION 54457 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT 54456 + +#define DBMS_MIN_PROTOCOL_VERSION_WITH_VIEW_IF_PERMITTED 54457 diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 5251569505e..7bac3f04fc6 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -1,5 +1,6 @@ #include "Settings.h" +#include #include #include #include @@ -145,6 +146,53 @@ std::vector Settings::getAllRegisteredNames() const return all_settings; } +void Settings::set(std::string_view name, const Field & value) +{ + BaseSettings::set(name, value); + + if (name == "compatibility") + applyCompatibilitySetting(); + /// If we change setting that was changed by compatibility setting before + /// we should remove it from settings_changed_by_compatibility_setting, + /// otherwise the next time we will change compatibility setting + /// this setting will be changed too (and we don't want it). + else if (settings_changed_by_compatibility_setting.contains(name)) + settings_changed_by_compatibility_setting.erase(name); +} + +void Settings::applyCompatibilitySetting() +{ + /// First, revert all changes applied by previous compatibility setting + for (const auto & setting_name : settings_changed_by_compatibility_setting) + resetToDefault(setting_name); + + settings_changed_by_compatibility_setting.clear(); + String compatibility = getString("compatibility"); + /// If setting value is empty, we don't need to change settings + if (compatibility.empty()) + return; + + ClickHouseVersion version(compatibility); + /// Iterate through ClickHouse version in descending order and apply reversed + /// changes for each version that is higher that version from compatibility setting + for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) + { + if (version >= it->first) + break; + + /// Apply reversed changes from this version. + for (const auto & change : it->second) + { + /// If this setting was changed manually, we don't change it + if (isChanged(change.name) && !settings_changed_by_compatibility_setting.contains(change.name)) + continue; + + BaseSettings::set(change.name, change.previous_value); + settings_changed_by_compatibility_setting.insert(change.name); + } + } +} + IMPLEMENT_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bda72f089eb..17e4d27bbcd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -35,6 +35,10 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) * * `flags` can be either 0 or IMPORTANT. * A setting is "IMPORTANT" if it affects the results of queries and can't be ignored by older versions. + * + * When adding new settings that control some backward incompatible changes or when changing some settings values, + * consider adding them to settings changes history in SettingsChangesHistory.h for special `compatibility` setting + * to work correctly. */ #define COMMON_SETTINGS(M) \ @@ -132,6 +136,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \ M(Bool, enable_positional_arguments, true, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \ \ + M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ + \ M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \ M(UInt64, parallel_replicas_count, 0, "", 0) \ M(UInt64, parallel_replica_offset, 0, "", 0) \ @@ -599,6 +605,11 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, allow_deprecated_database_ordinary, false, "Allow to create databases with deprecated Ordinary engine", 0) \ M(Bool, allow_deprecated_syntax_for_merge_tree, false, "Allow to create *MergeTree tables with deprecated engine definition syntax", 0) \ \ + M(String, compatibility, "", "Changes other settings according to provided ClickHouse version. If we know that we changed some behaviour in ClickHouse by changing some settings in some version, this compatibility setting will control these settings", 0) \ + \ + M(Map, additional_table_filters, "", "Additional filter expression which would be applied after reading from specified table. Syntax: {'table1': 'expression', 'database.table2': 'expression'}", 0) \ + M(String, additional_result_filter, "", "Additional filter expression which would be applied to query result", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -652,7 +663,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) #define FORMAT_FACTORY_SETTINGS(M) \ M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \ - M(Bool, format_csv_allow_single_quotes, true, "If it is set to true, allow strings in single quotes.", 0) \ + M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \ M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \ M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \ M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \ @@ -758,7 +769,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \ M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \ \ - M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if possible. Possible values: 0 - no rewrite, 1 - apply if possible, 2 - force rewrite all cross joins", 0) \ + M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there're joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \ @@ -825,6 +836,13 @@ struct Settings : public BaseSettings, public IHints<2, Settings void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field); + + void set(std::string_view name, const Field & value) override; + +private: + void applyCompatibilitySetting(); + + std::unordered_set settings_changed_by_compatibility_setting; }; /* diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h new file mode 100644 index 00000000000..ba60fb99308 --- /dev/null +++ b/src/Core/SettingsChangesHistory.h @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +class ClickHouseVersion +{ +public: + ClickHouseVersion(const String & version) + { + Strings split; + boost::split(split, version, [](char c){ return c == '.'; }); + components.reserve(split.size()); + if (split.empty()) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + + for (const auto & split_element : split) + { + size_t component; + if (!tryParse(component, split_element)) + throw Exception{ErrorCodes::BAD_ARGUMENTS, "Cannot parse ClickHouse version here: {}", version}; + components.push_back(component); + } + } + + ClickHouseVersion(const char * version) : ClickHouseVersion(String(version)) {} + + String toString() const + { + String version = std::to_string(components[0]); + for (size_t i = 1; i < components.size(); ++i) + version += "." + std::to_string(components[i]); + + return version; + } + + bool operator<(const ClickHouseVersion & other) const + { + return components < other.components; + } + + bool operator>=(const ClickHouseVersion & other) const + { + return components >= other.components; + } + +private: + std::vector components; +}; + +namespace SettingsChangesHistory +{ + struct SettingChange + { + String name; + Field previous_value; + Field new_value; + String reason; + }; + + using SettingsChanges = std::vector; +} + +/// History of settings changes that controls some backward incompatible changes +/// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done +/// in this version. Settings changes is a vector of structs {setting_name, previous_value, new_value} +/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) +static std::map settings_changes_history = +{ + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature bu default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, +}; + +} diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 827a457a5dc..d77a510d7f9 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -51,6 +53,37 @@ namespace else return applyVisitor(FieldVisitorConvertToNumber(), f); } + +#ifndef KEEPER_STANDALONE_BUILD + Map stringToMap(const String & str) + { + /// Allow empty string as an empty map + if (str.empty()) + return {}; + + auto type_string = std::make_shared(); + DataTypeMap type_map(type_string, type_string); + auto serialization = type_map.getSerialization(ISerialization::Kind::DEFAULT); + auto column = type_map.createColumn(); + + ReadBufferFromString buf(str); + serialization->deserializeTextEscaped(*column, buf, {}); + return (*column)[0].safeGet(); + } + + Map fieldToMap(const Field & f) + { + if (f.getType() == Field::Types::String) + { + /// Allow to parse Map from string field. For the convenience. + const auto & str = f.get(); + return stringToMap(str); + } + + return f.safeGet(); + } +#endif + } template @@ -291,6 +324,48 @@ void SettingFieldString::readBinary(ReadBuffer & in) *this = std::move(str); } +#ifndef KEEPER_STANDALONE_BUILD + +SettingFieldMap::SettingFieldMap(const Field & f) : value(fieldToMap(f)) {} + +String SettingFieldMap::toString() const +{ + auto type_string = std::make_shared(); + DataTypeMap type_map(type_string, type_string); + auto serialization = type_map.getSerialization(ISerialization::Kind::DEFAULT); + auto column = type_map.createColumn(); + column->insert(value); + + WriteBufferFromOwnString out; + serialization->serializeTextEscaped(*column, 0, out, {}); + return out.str(); +} + + +SettingFieldMap & SettingFieldMap::operator =(const Field & f) +{ + *this = fieldToMap(f); + return *this; +} + +void SettingFieldMap::parseFromString(const String & str) +{ + *this = stringToMap(str); +} + +void SettingFieldMap::writeBinary(WriteBuffer & out) const +{ + DB::writeBinary(value, out); +} + +void SettingFieldMap::readBinary(ReadBuffer & in) +{ + Map map; + DB::readBinary(map, in); + *this = map; +} + +#endif namespace { diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 4033eb2b598..20f2b34084e 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -168,6 +168,32 @@ struct SettingFieldString void readBinary(ReadBuffer & in); }; +#ifndef KEEPER_STANDALONE_BUILD + +struct SettingFieldMap +{ +public: + Map value; + bool changed = false; + + explicit SettingFieldMap(const Map & map = {}) : value(map) {} + explicit SettingFieldMap(Map && map) : value(std::move(map)) {} + explicit SettingFieldMap(const Field & f); + + SettingFieldMap & operator =(const Map & map) { value = map; changed = true; return *this; } + SettingFieldMap & operator =(const Field & f); + + operator const Map &() const { return value; } /// NOLINT + explicit operator Field() const { return value; } + + String toString() const; + void parseFromString(const String & str); + + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); +}; + +#endif struct SettingFieldChar { diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index b354b1278be..a14fb785b96 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -85,6 +85,13 @@ DataTypePtr makeNullable(const DataTypePtr & type) return std::make_shared(type); } +DataTypePtr makeNullableSafe(const DataTypePtr & type) +{ + if (type->canBeInsideNullable()) + return makeNullable(type); + return type; +} + DataTypePtr removeNullable(const DataTypePtr & type) { if (type->isNullable()) diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index c87e4f77008..379119b364c 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -51,6 +51,7 @@ private: DataTypePtr makeNullable(const DataTypePtr & type); +DataTypePtr makeNullableSafe(const DataTypePtr & type); DataTypePtr removeNullable(const DataTypePtr & type); } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 558b13927c1..1ef86a8c12f 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -214,6 +214,19 @@ size_t DataTypeTuple::getPositionByName(const String & name) const throw Exception("Tuple doesn't have element with name '" + name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } +std::optional DataTypeTuple::tryGetPositionByName(const String & name) const +{ + size_t size = elems.size(); + for (size_t i = 0; i < size; ++i) + { + if (names[i] == name) + { + return std::optional(i); + } + } + return std::nullopt; +} + String DataTypeTuple::getNameByPosition(size_t i) const { if (i == 0 || i > names.size()) diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 009a2284a0a..eed04631528 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -60,6 +61,7 @@ public: const Strings & getElementNames() const { return names; } size_t getPositionByName(const String & name) const; + std::optional tryGetPositionByName(const String & name) const; String getNameByPosition(size_t i) const; bool haveExplicitNames() const { return have_explicit_names; } diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index 00b5e2b9e37..7bcc6593435 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -13,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int DECIMAL_OVERFLOW; + extern const int LOGICAL_ERROR; } /// Implements Decimal(P, S), where P is precision, S is scale. @@ -58,7 +60,7 @@ inline const DataTypeDecimal * checkDecimal(const IDataType & data_type) return typeid_cast *>(&data_type); } -inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) +inline UInt32 getDecimalScale(const IDataType & data_type) { if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); @@ -68,7 +70,10 @@ inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value return decimal_type->getScale(); if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - return default_value; + if (const auto * date_time_type = typeid_cast(&data_type)) + return date_time_type->getScale(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get decimal scale from type {}", data_type.getName()); } inline UInt32 getDecimalPrecision(const IDataType & data_type) @@ -81,7 +86,10 @@ inline UInt32 getDecimalPrecision(const IDataType & data_type) return decimal_type->getPrecision(); if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - return 0; + if (const auto * date_time_type = typeid_cast(&data_type)) + return date_time_type->getPrecision(); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get decimal precision from type {}", data_type.getName()); } template diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index fce8906abe5..a26c703cd8a 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -532,6 +532,12 @@ inline bool isBool(const DataTypePtr & data_type) return data_type->getName() == "Bool"; } +inline bool isAggregateFunction(const DataTypePtr & data_type) +{ + WhichDataType which(data_type); + return which.isAggregateFunction(); +} + template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 65ed37f1dcf..fee3cf1553e 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -554,7 +554,11 @@ DataTypePtr getLeastSupertype(const DataTypes & types) UInt32 max_scale = 0; for (const auto & type : types) { - UInt32 scale = getDecimalScale(*type, 0); + auto type_id = type->getTypeId(); + if (type_id != TypeIndex::Decimal32 && type_id != TypeIndex::Decimal64 && type_id != TypeIndex::Decimal128) + continue; + + UInt32 scale = getDecimalScale(*type); if (scale > max_scale) max_scale = scale; } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 8edb00e5a67..e6479727aad 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include namespace DB diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 941df99298b..2337fa00af5 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -41,6 +40,10 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +class IDisk; +using DiskPtr = std::shared_ptr; +using DisksMap = std::map; + class IReservation; using ReservationPtr = std::unique_ptr; using Reservations = std::vector; @@ -363,7 +366,6 @@ private: std::unique_ptr executor; }; -using DiskPtr = std::shared_ptr; using Disks = std::vector; /** diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index b929cea0236..a3d5cfc408d 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace ProfileEvents diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 6df093ebd43..34b3d86b355 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 44976b7cf2d..dc70008649e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index ca414a7ee72..0b7d16bd895 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index 99606a18517..b8ab2f49202 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h index b6426df1e9a..6d5ae12a157 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index f9c5c139b95..f3ac94768d8 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 06398b11aec..1ab2d75ff86 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 55c466d45f6..d36bf655c02 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 98397224629..8f20671d841 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 11a91bd50dc..058f9b7059b 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -66,7 +66,7 @@ ColumnsDescription readSchemaFromFormat( } catch (const DB::Exception & e) { - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message()); + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}. You can specify the structure manually", format_name, e.message()); } } else if (FormatFactory::instance().checkIfFormatHasSchemaReader(format_name)) @@ -75,16 +75,29 @@ ColumnsDescription readSchemaFromFormat( SchemaReaderPtr schema_reader; size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference; size_t iterations = 0; - while ((buf = read_buffer_iterator())) + while (true) { + bool is_eof = false; + try + { + buf = read_buffer_iterator(); + if (!buf) + break; + is_eof = buf->eof(); + } + catch (...) + { + auto exception_message = getCurrentExceptionMessage(false); + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file: {}. You can specify the structure manually", format_name, exception_message); + } ++iterations; - if (buf->eof()) + if (is_eof) { auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name); if (!retry) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, exception_message); + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "{}. You can specify the structure manually", exception_message); exception_messages += "\n" + exception_message; continue; @@ -118,14 +131,14 @@ ColumnsDescription readSchemaFromFormat( } if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode())) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, exception_message); + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}. You can specify the structure manually", format_name, exception_message); exception_messages += "\n" + exception_message; } } if (names_and_types.empty()) - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from files failed. Errors:{}", exception_messages); + throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from files failed. Errors:{}\nYou can specify the structure manually", exception_messages); /// If we have "INSERT SELECT" query then try to order /// columns as they are ordered in table schema for formats diff --git a/src/Functions/parseTimeDelta.cpp b/src/Functions/parseTimeDelta.cpp index fb5a7621a53..8cb7c229ae8 100644 --- a/src/Functions/parseTimeDelta.cpp +++ b/src/Functions/parseTimeDelta.cpp @@ -18,7 +18,8 @@ namespace ErrorCodes namespace { - const std::unordered_map time_unit_to_float = { + const std::unordered_map time_unit_to_float = + { {"years", 365 * 24 * 3600}, {"year", 365 * 24 * 3600}, {"yr", 365 * 24 * 3600}, @@ -50,6 +51,22 @@ namespace {"second", 1}, {"sec", 1}, {"s", 1}, + + {"milliseconds", 1e-3}, + {"millisecond", 1e-3}, + {"millisec", 1e-3}, + {"ms", 1e-3}, + + {"microseconds", 1e-6}, + {"microsecond", 1e-6}, + {"microsec", 1e-6}, + {"μs", 1e-6}, + {"us", 1e-6}, + + {"nanoseconds", 1e-9}, + {"nanosecond", 1e-9}, + {"nanosec", 1e-9}, + {"ns", 1e-9}, }; /** Prints amount of seconds in form of: @@ -248,7 +265,7 @@ namespace static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos) { int64_t begin_index = index; - while (index <= last_pos && isalpha(str[index])) + while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index])) { index++; } @@ -271,14 +288,18 @@ namespace scanSpaces(str, index, last_pos); /// ignore separator - if (index <= last_pos - && (str[index] == ';' || str[index] == '-' || str[index] == '+' || str[index] == ',' || str[index] == ':')) + if (index <= last_pos && isSeparator(str[index])) { index++; } scanSpaces(str, index, last_pos); } + + static bool isSeparator(char symbol) + { + return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' '; + } }; } diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 023dc266b43..92ca6b85714 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -18,6 +18,10 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_INDEX; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; + extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; } namespace @@ -40,9 +44,11 @@ public: return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { - return 2; + return 0; } bool useDefaultImplementationForConstants() const override @@ -59,8 +65,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - size_t count_arrays = 0; + const size_t number_of_arguments = arguments.size(); + if (number_of_arguments < 2 || number_of_arguments > 3) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(number_of_arguments) + ", should be 2 or 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + size_t count_arrays = 0; const IDataType * tuple_col = arguments[0].type.get(); while (const DataTypeArray * array = checkAndGetDataType(tuple_col)) { @@ -72,16 +84,34 @@ public: if (!tuple) throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - size_t index = getElementNum(arguments[1].column, *tuple); - DataTypePtr out_return_type = tuple->getElements()[index]; + auto index = getElementNum(arguments[1].column, *tuple, number_of_arguments); + if (index.has_value()) + { + DataTypePtr out_return_type = tuple->getElements()[index.value()]; - for (; count_arrays; --count_arrays) - out_return_type = std::make_shared(out_return_type); + for (; count_arrays; --count_arrays) + out_return_type = std::make_shared(out_return_type); - return out_return_type; + return out_return_type; + } + else + { + const IDataType * default_col = arguments[2].type.get(); + size_t default_argument_count_arrays = 0; + if (const DataTypeArray * array = checkAndGetDataType(default_col)) + { + default_argument_count_arrays = array->getNumberOfDimensions(); + } + + if (count_arrays != default_argument_count_arrays) + { + throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, "Dimension of types mismatched between first argument and third argument. Dimension of 1st argument: {}. Dimension of 3rd argument: {}.",count_arrays, default_argument_count_arrays); + } + return arguments[2].type; + } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { Columns array_offsets; @@ -89,6 +119,12 @@ public: const IDataType * tuple_type = first_arg.type.get(); const IColumn * tuple_col = first_arg.column.get(); + bool first_arg_is_const = false; + if (typeid_cast(tuple_col)) + { + tuple_col = assert_cast(tuple_col)->getDataColumnPtr().get(); + first_arg_is_const = true; + } while (const DataTypeArray * array_type = checkAndGetDataType(tuple_type)) { const ColumnArray * array_col = assert_cast(tuple_col); @@ -103,18 +139,87 @@ public: if (!tuple_type_concrete || !tuple_col_concrete) throw Exception("First argument for function " + getName() + " must be tuple or array of tuple.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - size_t index = getElementNum(arguments[1].column, *tuple_type_concrete); - ColumnPtr res = tuple_col_concrete->getColumns()[index]; + auto index = getElementNum(arguments[1].column, *tuple_type_concrete, arguments.size()); + + if (!index.has_value()) + { + if (!array_offsets.empty()) + { + recursiveCheckArrayOffsets(arguments[0].column, arguments[2].column, array_offsets.size()); + } + return arguments[2].column; + } + + ColumnPtr res = tuple_col_concrete->getColumns()[index.value()]; /// Wrap into Arrays for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it) res = ColumnArray::create(res, *it); + if (first_arg_is_const) + { + res = ColumnConst::create(res, input_rows_count); + } return res; } private: - size_t getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple) const + + void recursiveCheckArrayOffsets(ColumnPtr col_x, ColumnPtr col_y, size_t depth) const + { + for (size_t i = 1; i < depth; ++i) + { + checkArrayOffsets(col_x, col_y); + col_x = assert_cast(col_x.get())->getDataPtr(); + col_y = assert_cast(col_y.get())->getDataPtr(); + } + checkArrayOffsets(col_x, col_y); + } + + void checkArrayOffsets(ColumnPtr col_x, ColumnPtr col_y) const + { + if (isColumnConst(*col_x)) + { + checkArrayOffsetsWithFirstArgConst(col_x, col_y); + } + else if (isColumnConst(*col_y)) + { + checkArrayOffsetsWithFirstArgConst(col_y, col_x); + } + else + { + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + if (!array_x.hasEqualOffsets(array_y)) + { + throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + } + } + + void checkArrayOffsetsWithFirstArgConst(ColumnPtr col_x, ColumnPtr col_y) const + { + col_x = assert_cast(col_x.get())->getDataColumnPtr(); + col_y = col_y->convertToFullColumnIfConst(); + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + + const auto & offsets_x = array_x.getOffsets(); + const auto & offsets_y = array_y.getOffsets(); + + ColumnArray::Offset prev_offset = 0; + size_t row_size = offsets_y.size(); + for (size_t row = 0; row < row_size; ++row) + { + if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) + { + throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + prev_offset = offsets_y[row]; + } + } + + std::optional getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple, const size_t argument_size) const { if ( checkAndGetColumnConst(index_column.get()) @@ -131,11 +236,21 @@ private: if (index > tuple.getElements().size()) throw Exception("Index for tuple element is out of range.", ErrorCodes::ILLEGAL_INDEX); - return index - 1; + return std::optional(index - 1); } else if (const auto * name_col = checkAndGetColumnConst(index_column.get())) { - return tuple.getPositionByName(name_col->getValue()); + auto index = tuple.tryGetPositionByName(name_col->getValue()); + if (index.has_value()) + { + return index; + } + + if (argument_size == 2) + { + throw Exception("Tuple doesn't have element with name '" + name_col->getValue() + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + } + return std::nullopt; } else throw Exception("Second argument to " + getName() + " must be a constant UInt or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 406b519df79..920e76cd7d0 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef HAS_RESERVED_IDENTIFIER diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 40b0717c8b1..73c651189cd 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index 8f081f3d470..7b57b8803cd 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -23,20 +23,6 @@ ActionLocksManager::ActionLocksManager(ContextPtr context_) : WithContext(contex { } -template -inline void forEachTable(F && f, ContextPtr context) -{ - for (auto & elem : DatabaseCatalog::instance().getDatabases()) - for (auto iterator = elem.second->getTablesIterator(context); iterator->isValid(); iterator->next()) - if (auto table = iterator->table()) - f(table); -} - -void ActionLocksManager::add(StorageActionBlockType action_type, ContextPtr context_) -{ - forEachTable([&](const StoragePtr & table) { add(table, action_type); }, context_); -} - void ActionLocksManager::add(const StorageID & table_id, StorageActionBlockType action_type) { if (auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext())) @@ -54,14 +40,6 @@ void ActionLocksManager::add(const StoragePtr & table, StorageActionBlockType ac } } -void ActionLocksManager::remove(StorageActionBlockType action_type) -{ - std::lock_guard lock(mutex); - - for (auto & storage_elem : storage_locks) - storage_elem.second.erase(action_type); -} - void ActionLocksManager::remove(const StorageID & table_id, StorageActionBlockType action_type) { if (auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext())) diff --git a/src/Interpreters/ActionLocksManager.h b/src/Interpreters/ActionLocksManager.h index be112e71950..d1da81a8dd4 100644 --- a/src/Interpreters/ActionLocksManager.h +++ b/src/Interpreters/ActionLocksManager.h @@ -20,14 +20,10 @@ class ActionLocksManager : WithContext public: explicit ActionLocksManager(ContextPtr context); - /// Adds new locks for each table - void add(StorageActionBlockType action_type, ContextPtr context); /// Add new lock for a table if it has not been already added void add(const StorageID & table_id, StorageActionBlockType action_type); void add(const StoragePtr & table, StorageActionBlockType action_type); - /// Remove locks for all tables - void remove(StorageActionBlockType action_type); /// Removes a lock for a table if it exists void remove(const StorageID & table_id, StorageActionBlockType action_type); void remove(const StoragePtr & table, StorageActionBlockType action_type); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index b057b6ee641..9fd27fc28b6 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -989,9 +989,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti if (s.rfind("processor", 0) == 0) { + /// s390x example: processor 0: version = FF, identification = 039C88, machine = 3906 + /// non s390x example: processor : 0 if (auto colon = s.find_first_of(':')) { +#ifdef __s390x__ + core_id = std::stoi(s.substr(10)); /// 10: length of "processor" plus 1 +#else core_id = std::stoi(s.substr(colon + 2)); +#endif } } else if (s.rfind("cpu MHz", 0) == 0) diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index d438ea9394e..cfa979f4036 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -39,7 +39,10 @@ struct JoinedElement : element(table_element) { if (element.table_join) + { join = element.table_join->as(); + original_kind = join->kind; + } } void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const @@ -61,6 +64,8 @@ struct JoinedElement join->kind = ASTTableJoin::Kind::Cross; } + ASTTableJoin::Kind getOriginalKind() const { return original_kind; } + bool rewriteCrossToInner(ASTPtr on_expression) { if (join->kind != ASTTableJoin::Kind::Cross) @@ -83,6 +88,8 @@ struct JoinedElement private: const ASTTablesInSelectQueryElement & element; ASTTableJoin * join = nullptr; + + ASTTableJoin::Kind original_kind; }; bool isAllowedToRewriteCrossJoin(const ASTPtr & node, const Aliases & aliases) @@ -251,10 +258,17 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da } } - if (data.cross_to_inner_join_rewrite > 1 && !rewritten) + if (joined.getOriginalKind() == ASTTableJoin::Kind::Comma && + data.cross_to_inner_join_rewrite > 1 && + !rewritten) { - throw Exception(ErrorCodes::INCORRECT_QUERY, "Failed to rewrite '{} WHERE {}' to INNER JOIN", - query_before, queryToString(select.where())); + throw Exception( + ErrorCodes::INCORRECT_QUERY, + "Failed to rewrite comma join to INNER. " + "Please, try to simplify WHERE section " + "or set the setting `cross_to_inner_join_rewrite` to 1 to allow slow CROSS JOIN for this case " + "(cannot rewrite '{} WHERE {}' to INNER JOIN)", + query_before, queryToString(select.where())); } } } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 23258c60099..8a14c09819a 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -45,6 +45,9 @@ #include #include +#include +#include +#include #include #include #include @@ -345,6 +348,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) group_by_kind = GroupByKind::GROUPING_SETS; else group_by_kind = GroupByKind::ORDINARY; + bool use_nulls = group_by_kind != GroupByKind::ORDINARY && getContext()->getSettingsRef().group_by_use_nulls; /// For GROUPING SETS with multiple groups we always add virtual __grouping_set column /// With set number, which is used as an additional key at the stage of merging aggregating data. @@ -399,7 +403,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - NameAndTypePair key{column_name, node->result_type}; + NameAndTypePair key{column_name, use_nulls ? makeNullableSafe(node->result_type) : node->result_type }; grouping_set_list.push_back(key); @@ -453,7 +457,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) } } - NameAndTypePair key{column_name, node->result_type}; + NameAndTypePair key = NameAndTypePair{ column_name, use_nulls ? makeNullableSafe(node->result_type) : node->result_type }; /// Aggregation keys are uniqued. if (!unique_keys.contains(key.name)) @@ -1489,6 +1493,28 @@ void SelectQueryExpressionAnalyzer::appendExpressionsAfterWindowFunctions(Expres } } +void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) +{ + const auto * select_query = getAggregatingQuery(); + + if (!select_query->groupBy() || !(select_query->group_by_with_rollup || select_query->group_by_with_cube)) + return; + + auto source_columns = before_aggregation->getResultColumns(); + ColumnsWithTypeAndName result_columns; + + for (const auto & source_column : source_columns) + { + if (source_column.type->canBeInsideNullable()) + result_columns.emplace_back(makeNullableSafe(source_column.type), source_column.name); + else + result_columns.push_back(source_column); + } + ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation->getNamesAndTypesList()); + + step.actions() = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); +} + void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) { if (auto * function = node->as()) @@ -1816,6 +1842,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( bool second_stage_, bool only_types, const FilterDAGInfoPtr & filter_info_, + const FilterDAGInfoPtr & additional_filter, const Block & source_header) : first_stage(first_stage_) , second_stage(second_stage_) @@ -1882,6 +1909,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( columns_for_final.begin(), columns_for_final.end()); } + if (storage && additional_filter) + { + Names columns_for_additional_filter = additional_filter->actions->getRequiredColumnsNames(); + additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), + columns_for_additional_filter.begin(), columns_for_additional_filter.end()); + } + if (storage && filter_info_) { filter_info = filter_info_; @@ -1956,6 +1990,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( query_analyzer.appendAggregateFunctionsArguments(chain, only_types || !first_stage); before_aggregation = chain.getLastActions(); + if (settings.group_by_use_nulls) + query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); + finalize_chain(chain); if (query_analyzer.appendHaving(chain, only_types || !second_stage)) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 019cda8b924..da92bc10832 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -281,6 +281,7 @@ struct ExpressionAnalysisResult bool second_stage, bool only_types, const FilterDAGInfoPtr & filter_info, + const FilterDAGInfoPtr & additional_filter, /// for setting additional_filters const Block & source_header); /// Filter for row-level security. @@ -412,6 +413,8 @@ private: void appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool only_types); void appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node); + void appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool only_types); + /// After aggregation: bool appendHaving(ExpressionActionsChain & chain, bool only_types); /// appendSelect diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index 4ac1d33468f..05486f65da5 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -4,6 +4,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include namespace DB { @@ -81,6 +88,53 @@ void IInterpreterUnionOrSelectQuery::setQuota(QueryPipeline & pipeline) const pipeline.setQuota(quota); } +static ASTPtr parseAdditionalPostFilter(const Context & context) +{ + const auto & settings = context.getSettingsRef(); + const String & filter = settings.additional_result_filter; + if (filter.empty()) + return nullptr; + + ParserExpression parser; + return parseQuery( + parser, filter.data(), filter.data() + filter.size(), + "additional filter", settings.max_query_size, settings.max_parser_depth); +} + +static ActionsDAGPtr makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) +{ + auto syntax_result = TreeRewriter(context).analyze(ast, header.getNamesAndTypesList()); + String result_column_name = ast->getColumnName(); + auto dag = ExpressionAnalyzer(ast, syntax_result, context).getActionsDAG(false, false); + const ActionsDAG::Node * result_node = &dag->findInIndex(result_column_name); + auto & index = dag->getIndex(); + index.clear(); + index.reserve(dag->getInputs().size() + 1); + for (const auto * node : dag->getInputs()) + index.push_back(node); + + index.push_back(result_node); + + return dag; +} + +void IInterpreterUnionOrSelectQuery::addAdditionalPostFilter(QueryPlan & plan) const +{ + if (options.subquery_depth != 0) + return; + + auto ast = parseAdditionalPostFilter(*context); + if (!ast) + return; + + auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentDataStream().header); + std::string filter_name = dag->getIndex().back()->result_name; + auto filter_step = std::make_unique( + plan.getCurrentDataStream(), std::move(dag), std::move(filter_name), true); + filter_step->setStepDescription("Additional result filter"); + plan.addStep(std::move(filter_step)); +} + void IInterpreterUnionOrSelectQuery::addStorageLimits(const StorageLimitsList & limits) { for (const auto & val : limits) diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index 98e0432f3d5..a1c86f9de85 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -72,6 +72,8 @@ protected: /// Set quotas to query pipeline. void setQuota(QueryPipeline & pipeline) const; + /// Add filter from additional_post_filter setting. + void addAdditionalPostFilter(QueryPlan & plan) const; static StorageLimits getStorageLimits(const Context & context, const SelectQueryOptions & options); }; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3fad4374abe..6715947da43 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -146,14 +146,14 @@ namespace struct QueryASTSettings { bool graph = false; - bool rewrite = false; + bool optimize = false; constexpr static char name[] = "AST"; std::unordered_map> boolean_settings = { {"graph", graph}, - {"rewrite", rewrite} + {"optimize", optimize} }; }; @@ -280,7 +280,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() case ASTExplainQuery::ParsedAST: { auto settings = checkAndGetSettings(ast.getSettings()); - if (settings.rewrite) + if (settings.optimize) { ExplainAnalyzedSyntaxVisitor::Data data(getContext()); ExplainAnalyzedSyntaxVisitor(data).visit(query); diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 4d0c82d3345..d6add3f77a9 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -138,6 +138,7 @@ void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_pla auto step = std::make_unique(std::move(data_streams), final_operator, max_threads); query_plan.unitePlans(std::move(step), std::move(plans)); + addAdditionalPostFilter(query_plan); query_plan.addInterpreterContext(context); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ac31588d210..24bbaea7dcf 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -109,8 +109,17 @@ namespace ErrorCodes } /// Assumes `storage` is set and the table filter (row-level security) is not empty. -String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, const Names & prerequisite_columns) const +FilterDAGInfoPtr generateFilterActions( + const StorageID & table_id, + const ASTPtr & row_policy_filter, + const ContextPtr & context, + const StoragePtr & storage, + const StorageSnapshotPtr & storage_snapshot, + const StorageMetadataPtr & metadata_snapshot, + Names & prerequisite_columns) { + auto filter_info = std::make_shared(); + const auto & db_name = table_id.getDatabaseName(); const auto & table_name = table_id.getTableName(); @@ -146,16 +155,24 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot); - actions = analyzer.simpleSelectActions(); + filter_info->actions = analyzer.simpleSelectActions(); - auto column_name = expr_list->children.at(0)->getColumnName(); - actions->removeUnusedActions(NameSet{column_name}); - actions->projectInput(false); + filter_info->column_name = expr_list->children.at(0)->getColumnName(); + filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); + filter_info->actions->projectInput(false); - for (const auto * node : actions->getInputs()) - actions->getIndex().push_back(node); + for (const auto * node : filter_info->actions->getInputs()) + filter_info->actions->getIndex().push_back(node); - return column_name; + auto required_columns_from_filter = filter_info->actions->getRequiredColumns(); + + for (const auto & column : required_columns_from_filter) + { + if (prerequisite_columns.end() == std::find(prerequisite_columns.begin(), prerequisite_columns.end(), column.name)) + prerequisite_columns.push_back(column.name); + } + + return filter_info; } InterpreterSelectQuery::InterpreterSelectQuery( @@ -269,6 +286,33 @@ static void checkAccessRightsForSelect( context->checkAccess(AccessType::SELECT, table_id, syntax_analyzer_result.requiredSourceColumnsForAccessCheck()); } +static ASTPtr parseAdditionalFilterConditionForTable( + const Map & setting, + const DatabaseAndTableWithAlias & target, + const Context & context) +{ + for (size_t i = 0; i < setting.size(); ++i) + { + const auto & tuple = setting[i].safeGet(); + auto & table = tuple.at(0).safeGet(); + auto & filter = tuple.at(1).safeGet(); + + if (table == target.alias || + (table == target.table && context.getCurrentDatabase() == target.database) || + (table == target.database + '.' + target.table)) + { + /// Try to parse expression + ParserExpression parser; + const auto & settings = context.getSettingsRef(); + return parseQuery( + parser, filter.data(), filter.data() + filter.size(), + "additional filter", settings.max_query_size, settings.max_parser_depth); + } + } + + return nullptr; +} + /// Returns true if we should ignore quotas and limits for a specified table in the system database. static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id) { @@ -448,6 +492,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) view = dynamic_cast(storage.get()); + if (!settings.additional_table_filters.value.empty() && storage && !joined_tables.tablesWithColumns().empty()) + query_info.additional_filter_ast = parseAdditionalFilterConditionForTable( + settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context); + auto analyze = [&] (bool try_move_to_prewhere) { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. @@ -566,16 +614,16 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Fix source_header for filter actions. if (row_policy_filter) { - filter_info = std::make_shared(); - filter_info->column_name = generateFilterActions(filter_info->actions, required_columns); + filter_info = generateFilterActions( + table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns); + } - auto required_columns_from_filter = filter_info->actions->getRequiredColumns(); + if (query_info.additional_filter_ast) + { + additional_filter_info = generateFilterActions( + table_id, query_info.additional_filter_ast, context, storage, storage_snapshot, metadata_snapshot, required_columns); - for (const auto & column : required_columns_from_filter) - { - if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name)) - required_columns.push_back(column.name); - } + additional_filter_info->do_remove_column = true; } source_header = storage_snapshot->getSampleBlockForColumns(required_columns); @@ -735,7 +783,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() && options.to_stage > QueryProcessingStage::WithMergeableState; analysis_result = ExpressionAnalysisResult( - *query_analyzer, metadata_snapshot, first_stage, second_stage, options.only_analyze, filter_info, source_header); + *query_analyzer, metadata_snapshot, first_stage, second_stage, options.only_analyze, filter_info, additional_filter_info, source_header); if (options.to_stage == QueryProcessingStage::Enum::FetchColumns) { @@ -786,8 +834,16 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (analysis_result.use_grouping_set_key) res.insert({ nullptr, std::make_shared(), "__grouping_set" }); - for (const auto & key : query_analyzer->aggregationKeys()) - res.insert({nullptr, header.getByName(key.name).type, key.name}); + if (context->getSettingsRef().group_by_use_nulls && analysis_result.use_grouping_set_key) + { + for (const auto & key : query_analyzer->aggregationKeys()) + res.insert({nullptr, makeNullableSafe(header.getByName(key.name).type), key.name}); + } + else + { + for (const auto & key : query_analyzer->aggregationKeys()) + res.insert({nullptr, header.getByName(key.name).type, key.name}); + } for (const auto & aggregate : query_analyzer->aggregates()) { @@ -1295,6 +1351,18 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( + query_plan.getCurrentDataStream(), + additional_filter_info->actions, + additional_filter_info->column_name, + additional_filter_info->do_remove_column); + + additional_filter_step->setStepDescription("Additional filter"); + query_plan.addStep(std::move(additional_filter_step)); + } + if (expressions.before_array_join) { QueryPlanStepPtr before_array_join_step @@ -1937,6 +2005,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && storage && storage->getName() != "MaterializedMySQL" && !row_policy_filter + && !query_info.additional_filter_ast && processing_stage == QueryProcessingStage::FetchColumns && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) @@ -2036,6 +2105,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && !query.limit_with_ties && !query.prewhere() && !query.where() + && !query_info.additional_filter_ast && !query.groupBy() && !query.having() && !query.orderBy() @@ -2326,6 +2396,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac merge_threads, temporary_data_merge_threads, storage_has_evenly_distributed_read, + settings.group_by_use_nulls, std::move(group_by_info), std::move(group_by_sort_description), should_produce_results_in_order_of_bucket_number); @@ -2402,9 +2473,9 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific QueryPlanStepPtr step; if (modificator == Modificator::ROLLUP) - step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final); + step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final, settings.group_by_use_nulls); else if (modificator == Modificator::CUBE) - step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final); + step = std::make_unique(query_plan.getCurrentDataStream(), std::move(params), final, settings.group_by_use_nulls); query_plan.addStep(std::move(step)); } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index a95ff00bc0d..e70490f13ac 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -189,8 +189,6 @@ private: void executeMergeSorted(QueryPlan & query_plan, const SortDescription & sort_description, UInt64 limit, const std::string & description); - String generateFilterActions(ActionsDAGPtr & actions, const Names & prerequisite_columns = {}) const; - enum class Modificator { ROLLUP = 0, @@ -217,6 +215,9 @@ private: ASTPtr row_policy_filter; FilterDAGInfoPtr filter_info; + /// For additional_filter setting. + FilterDAGInfoPtr additional_filter_info; + QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; /// List of columns to read to execute the query. diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 9f87a47fced..bdec44b74f7 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -357,6 +357,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) } } + addAdditionalPostFilter(query_plan); query_plan.addInterpreterContext(context); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index b73919f4f36..1999eff37a8 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -26,7 +25,7 @@ namespace ErrorCodes using IdentifierNameSet = std::set; class WriteBuffer; - +using Strings = std::vector; /** Element of the syntax tree (hereinafter - directed acyclic graph with elements of semantics) */ diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index a9529924567..c493c49c61b 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 5f69db633ac..0ff437bcfb1 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -12,12 +12,63 @@ namespace DB { +class ParserLiteralOrMap : public IParserBase +{ +public: +protected: + const char * getName() const override { return "literal or map"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + { + ParserLiteral literal; + if (literal.parse(pos, node, expected)) + return true; + } + + ParserToken l_br(TokenType::OpeningCurlyBrace); + ParserToken r_br(TokenType::ClosingCurlyBrace); + ParserToken comma(TokenType::Comma); + ParserToken colon(TokenType::Colon); + ParserStringLiteral literal; + + if (!l_br.ignore(pos, expected)) + return false; + + Map map; + + while (!r_br.ignore(pos, expected)) + { + if (!map.empty() && !comma.ignore(pos, expected)) + return false; + + ASTPtr key; + ASTPtr val; + + if (!literal.parse(pos, key, expected)) + return false; + + if (!colon.ignore(pos, expected)) + return false; + + if (!literal.parse(pos, val, expected)) + return false; + + Tuple tuple; + tuple.push_back(std::move(key->as()->value)); + tuple.push_back(std::move(val->as()->value)); + map.push_back(std::move(tuple)); + } + + node = std::make_shared(std::move(map)); + return true; + } +}; /// Parse `name = value`. bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & pos, Expected & expected) { ParserCompoundIdentifier name_p; - ParserLiteral value_p; + ParserLiteralOrMap value_p; ParserToken s_eq(TokenType::Equals); ASTPtr name; diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 5232d9166af..ebd9783b4fd 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index ad173e449d6..50145fd5bc0 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -30,6 +30,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; } CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) @@ -264,20 +265,20 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension if (in->eof()) return false; - auto array = readMessage(); - -#if CAPNP_VERSION >= 7000 && CAPNP_VERSION < 8000 - capnp::UnalignedFlatArrayMessageReader msg(array); -#else - capnp::FlatArrayMessageReader msg(array); -#endif - - auto root_reader = msg.getRoot(root); - - for (size_t i = 0; i != columns.size(); ++i) + try { - auto value = getReaderByColumnName(root_reader, column_names[i]); - insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); + auto array = readMessage(); + capnp::FlatArrayMessageReader msg(array); + auto root_reader = msg.getRoot(root); + for (size_t i = 0; i != columns.size(); ++i) + { + auto value = getReaderByColumnName(root_reader, column_names[i]); + insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); + } + } + catch (const kj::Exception & e) + { + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot read row: {}", e.getDescription().cStr()); } return true; diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 0a4b12084eb..f4e3749bd70 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -46,22 +47,32 @@ Block appendGroupingSetColumn(Block header) return res; } -static Block appendGroupingColumn(Block block, const GroupingSetsParamsList & params) +static inline void convertToNullable(Block & header, const Names & keys) +{ + for (const auto & key : keys) + { + auto & column = header.getByName(key); + + column.type = makeNullableSafe(column.type); + column.column = makeNullableSafe(column.column); + } +} + +Block generateOutputHeader(const Block & input_header, const Names & keys, bool use_nulls) +{ + auto header = appendGroupingSetColumn(input_header); + if (use_nulls) + convertToNullable(header, keys); + return header; +} + + +static Block appendGroupingColumn(Block block, const Names & keys, const GroupingSetsParamsList & params, bool use_nulls) { if (params.empty()) return block; - Block res; - - size_t rows = block.rows(); - auto column = ColumnUInt64::create(rows); - - res.insert({ColumnPtr(std::move(column)), std::make_shared(), "__grouping_set"}); - - for (auto & col : block) - res.insert(std::move(col)); - - return res; + return generateOutputHeader(block, keys, use_nulls); } AggregatingStep::AggregatingStep( @@ -74,11 +85,12 @@ AggregatingStep::AggregatingStep( size_t merge_threads_, size_t temporary_data_merge_threads_, bool storage_has_evenly_distributed_read_, + bool group_by_use_nulls_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_, bool should_produce_results_in_order_of_bucket_number_) : ITransformingStep( - input_stream_, appendGroupingColumn(params_.getHeader(input_stream_.header, final_), grouping_sets_params_), getTraits(should_produce_results_in_order_of_bucket_number_), false) + input_stream_, appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) , final(final_) @@ -87,6 +99,7 @@ AggregatingStep::AggregatingStep( , merge_threads(merge_threads_) , temporary_data_merge_threads(temporary_data_merge_threads_) , storage_has_evenly_distributed_read(storage_has_evenly_distributed_read_) + , group_by_use_nulls(group_by_use_nulls_) , group_by_info(std::move(group_by_info_)) , group_by_sort_description(std::move(group_by_sort_description_)) , should_produce_results_in_order_of_bucket_number(should_produce_results_in_order_of_bucket_number_) @@ -217,6 +230,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B assert(ports.size() == grouping_sets_size); auto output_header = transform_params->getHeader(); + if (group_by_use_nulls) + convertToNullable(output_header, params.keys); for (size_t set_counter = 0; set_counter < grouping_sets_size; ++set_counter) { @@ -236,6 +251,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; + auto to_nullable_function = FunctionFactory::instance().get("toNullable", nullptr); for (size_t i = 0; i < output_header.columns(); ++i) { auto & col = output_header.getByPosition(i); @@ -251,7 +267,13 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B index.push_back(node); } else - index.push_back(dag->getIndex()[header.getPositionByName(col.name)]); + { + const auto * column_node = dag->getIndex()[header.getPositionByName(col.name)]; + if (group_by_use_nulls && column_node->result_type->canBeInsideNullable()) + index.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name)); + else + index.push_back(column_node); + } } dag->getIndex().swap(index); @@ -396,7 +418,7 @@ void AggregatingStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - appendGroupingColumn(params.getHeader(input_streams.front().header, final), grouping_sets_params), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, grouping_sets_params, group_by_use_nulls), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 0e982d76940..71130b65adb 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -20,6 +20,7 @@ struct GroupingSetsParams using GroupingSetsParamsList = std::vector; Block appendGroupingSetColumn(Block header); +Block generateOutputHeader(const Block & input_header, const Names & keys, bool use_nulls); /// Aggregation. See AggregatingTransform. class AggregatingStep : public ITransformingStep @@ -35,6 +36,7 @@ public: size_t merge_threads_, size_t temporary_data_merge_threads_, bool storage_has_evenly_distributed_read_, + bool group_by_use_nulls_, InputOrderInfoPtr group_by_info_, SortDescription group_by_sort_description_, bool should_produce_results_in_order_of_bucket_number_); @@ -62,6 +64,7 @@ private: size_t temporary_data_merge_threads; bool storage_has_evenly_distributed_read; + bool group_by_use_nulls; InputOrderInfoPtr group_by_info; SortDescription group_by_sort_description; diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index b0c57491085..52539dec75f 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -24,27 +25,41 @@ static ITransformingStep::Traits getTraits() }; } -CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_) - : ITransformingStep(input_stream_, appendGroupingSetColumn(params_.getHeader(input_stream_.header, final_)), getTraits()) +CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_) + : ITransformingStep(input_stream_, generateOutputHeader(params_.getHeader(input_stream_.header, final_), params_.keys, use_nulls_), getTraits()) , keys_size(params_.keys_size) , params(std::move(params_)) , final(final_) + , use_nulls(use_nulls_) { /// Aggregation keys are distinct for (const auto & key : params.keys) output_stream->distinct_columns.insert(key); } -ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) +ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number) { auto dag = std::make_shared(header.getColumnsWithTypeAndName()); + auto & index = dag->getIndex(); + + if (use_nulls) + { + auto to_nullable = FunctionFactory::instance().get("toNullable", nullptr); + for (const auto & key : keys) + { + const auto * node = dag->getIndex()[header.getPositionByName(key)]; + if (node->result_type->canBeInsideNullable()) + { + dag->addOrReplaceInIndex(dag->addFunction(to_nullable, { node }, node->result_name)); + } + } + } auto grouping_col = ColumnUInt64::create(1, grouping_set_number); const auto * grouping_node = &dag->addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); grouping_node = &dag->materializeNode(*grouping_node); - auto & index = dag->getIndex(); index.insert(index.begin(), grouping_node); auto expression = std::make_shared(dag, settings.getActionsSettings()); @@ -58,10 +73,10 @@ void CubeStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQue pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return addGroupingSetForTotals(header, settings, (UInt64(1) << keys_size) - 1); + return addGroupingSetForTotals(header, params.keys, use_nulls, settings, (UInt64(1) << keys_size) - 1); auto transform_params = std::make_shared(header, std::move(params), final); - return std::make_shared(header, std::move(transform_params)); + return std::make_shared(header, std::move(transform_params), use_nulls); }); } @@ -73,7 +88,7 @@ const Aggregator::Params & CubeStep::getParams() const void CubeStep::updateOutputStream() { output_stream = createOutputStream( - input_streams.front(), appendGroupingSetColumn(params.getHeader(input_streams.front().header, final)), getDataStreamTraits()); + input_streams.front(), generateOutputHeader(params.getHeader(input_streams.front().header, final), params.keys, use_nulls), getDataStreamTraits()); /// Aggregation keys are distinct for (const auto & key : params.keys) diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 87f22de7fc6..8a03a33a088 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -13,7 +13,7 @@ using AggregatingTransformParamsPtr = std::shared_ptr #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5d5c7e9cb2c..46be5ea1d7d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include namespace DB { @@ -9,6 +10,8 @@ using PartitionIdToMaxBlock = std::unordered_map; class Pipe; +using MergeTreeReadTaskCallback = std::function(PartitionReadRequest)>; + struct MergeTreeDataSelectSamplingData { bool use_sampling = false; diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 169976195ea..3305f24602f 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -22,18 +22,19 @@ static ITransformingStep::Traits getTraits() }; } -RollupStep::RollupStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_) - : ITransformingStep(input_stream_, appendGroupingSetColumn(params_.getHeader(input_stream_.header, final_)), getTraits()) +RollupStep::RollupStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_) + : ITransformingStep(input_stream_, generateOutputHeader(params_.getHeader(input_stream_.header, final_), params_.keys, use_nulls_), getTraits()) , params(std::move(params_)) , keys_size(params.keys_size) , final(final_) + , use_nulls(use_nulls_) { /// Aggregation keys are distinct for (const auto & key : params.keys) output_stream->distinct_columns.insert(key); } -ProcessorPtr addGroupingSetForTotals(const Block & header, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number); +ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number); void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { @@ -42,10 +43,10 @@ void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return addGroupingSetForTotals(header, settings, keys_size); + return addGroupingSetForTotals(header, params.keys, use_nulls, settings, keys_size); auto transform_params = std::make_shared(header, std::move(params), true); - return std::make_shared(header, std::move(transform_params)); + return std::make_shared(header, std::move(transform_params), use_nulls); }); } diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index c59bf9f3ee9..866de7178fa 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -13,7 +13,7 @@ using AggregatingTransformParamsPtr = std::shared_ptr +#include namespace DB { diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index b80ca29327f..669aaddd1df 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "Processors/Transforms/RollupTransform.h" namespace DB { @@ -9,61 +10,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) - , params(std::move(params_)) +CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_, bool use_nulls_) + : GroupByModifierTransform(std::move(header), params_, use_nulls_) , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { - keys.reserve(params->params.keys_size); - for (const auto & key : params->params.keys) - keys.emplace_back(input.getHeader().getPositionByName(key)); - if (keys.size() >= 8 * sizeof(mask)) throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); } -Chunk CubeTransform::merge(Chunks && chunks, bool final) -{ - BlocksList rollup_blocks; - for (auto & chunk : chunks) - rollup_blocks.emplace_back(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - - auto rollup_block = params->aggregator.mergeBlocks(rollup_blocks, final); - auto num_rows = rollup_block.rows(); - return Chunk(rollup_block.getColumns(), num_rows); -} - -void CubeTransform::consume(Chunk chunk) -{ - consumed_chunks.emplace_back(std::move(chunk)); -} - -MutableColumnPtr getColumnWithDefaults(Block const & header, size_t key, size_t n); - Chunk CubeTransform::generate() { if (!consumed_chunks.empty()) { - if (consumed_chunks.size() > 1) - cube_chunk = merge(std::move(consumed_chunks), false); - else - cube_chunk = std::move(consumed_chunks.front()); + mergeConsumed(); - consumed_chunks.clear(); - - auto num_rows = cube_chunk.getNumRows(); + auto num_rows = current_chunk.getNumRows(); mask = (static_cast(1) << keys.size()) - 1; - current_columns = cube_chunk.getColumns(); + current_columns = current_chunk.getColumns(); current_zero_columns.clear(); current_zero_columns.reserve(keys.size()); - auto const & input_header = getInputPort().getHeader(); for (auto key : keys) - current_zero_columns.emplace_back(getColumnWithDefaults(input_header, key, num_rows)); + current_zero_columns.emplace_back(getColumnWithDefaults(key, num_rows)); } - auto gen_chunk = std::move(cube_chunk); + auto gen_chunk = std::move(current_chunk); if (mask) { @@ -78,7 +50,7 @@ Chunk CubeTransform::generate() Chunks chunks; chunks.emplace_back(std::move(columns), current_columns.front()->size()); - cube_chunk = merge(std::move(chunks), false); + current_chunk = merge(std::move(chunks), !use_nulls, false); } finalizeChunk(gen_chunk, aggregates_mask); diff --git a/src/Processors/Transforms/CubeTransform.h b/src/Processors/Transforms/CubeTransform.h index bd33eabd750..54a41e8f44e 100644 --- a/src/Processors/Transforms/CubeTransform.h +++ b/src/Processors/Transforms/CubeTransform.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include @@ -9,30 +10,23 @@ namespace DB /// Takes blocks after grouping, with non-finalized aggregate functions. /// Calculates all subsets of columns and aggregates over them. -class CubeTransform : public IAccumulatingTransform +class CubeTransform : public GroupByModifierTransform { public: - CubeTransform(Block header, AggregatingTransformParamsPtr params); + CubeTransform(Block header, AggregatingTransformParamsPtr params, bool use_nulls_); String getName() const override { return "CubeTransform"; } protected: - void consume(Chunk chunk) override; Chunk generate() override; private: - AggregatingTransformParamsPtr params; - ColumnNumbers keys; const ColumnsMask aggregates_mask; - Chunks consumed_chunks; - Chunk cube_chunk; Columns current_columns; Columns current_zero_columns; UInt64 mask = 0; UInt64 grouping_set = 0; - - Chunk merge(Chunks && chunks, bool final); }; } diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index e5351d1d5e2..a5d67fb2f15 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -1,36 +1,80 @@ #include #include #include +#include namespace DB { -RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_) - : IAccumulatingTransform(std::move(header), appendGroupingSetColumn(params_->getHeader())) +GroupByModifierTransform::GroupByModifierTransform(Block header, AggregatingTransformParamsPtr params_, bool use_nulls_) + : IAccumulatingTransform(std::move(header), generateOutputHeader(params_->getHeader(), params_->params.keys, use_nulls_)) , params(std::move(params_)) - , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) + , use_nulls(use_nulls_) { keys.reserve(params->params.keys_size); for (const auto & key : params->params.keys) keys.emplace_back(input.getHeader().getPositionByName(key)); + + intermediate_header = getOutputPort().getHeader(); + intermediate_header.erase(0); + + if (use_nulls) + { + auto output_aggregator_params = params->params; + output_aggregator = std::make_unique(intermediate_header, output_aggregator_params); + } } -void RollupTransform::consume(Chunk chunk) +void GroupByModifierTransform::consume(Chunk chunk) { consumed_chunks.emplace_back(std::move(chunk)); } -Chunk RollupTransform::merge(Chunks && chunks, bool final) +void GroupByModifierTransform::mergeConsumed() { - BlocksList rollup_blocks; - for (auto & chunk : chunks) - rollup_blocks.emplace_back(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (consumed_chunks.size() > 1) + current_chunk = merge(std::move(consumed_chunks), true, false); + else + current_chunk = std::move(consumed_chunks.front()); - auto rollup_block = params->aggregator.mergeBlocks(rollup_blocks, final); - auto num_rows = rollup_block.rows(); - return Chunk(rollup_block.getColumns(), num_rows); + size_t rows = current_chunk.getNumRows(); + auto columns = current_chunk.getColumns(); + if (use_nulls) + { + for (auto key : keys) + columns[key] = makeNullableSafe(columns[key]); + } + current_chunk = Chunk{ columns, rows }; + + consumed_chunks.clear(); } +Chunk GroupByModifierTransform::merge(Chunks && chunks, bool is_input, bool final) +{ + auto header = is_input ? getInputPort().getHeader() : intermediate_header; + + BlocksList blocks; + for (auto & chunk : chunks) + blocks.emplace_back(header.cloneWithColumns(chunk.detachColumns())); + + auto current_block = is_input ? params->aggregator.mergeBlocks(blocks, final) : output_aggregator->mergeBlocks(blocks, final); + auto num_rows = current_block.rows(); + return Chunk(current_block.getColumns(), num_rows); +} + +MutableColumnPtr GroupByModifierTransform::getColumnWithDefaults(size_t key, size_t n) const +{ + auto const & col = intermediate_header.getByPosition(key); + auto result_column = col.column->cloneEmpty(); + col.type->insertManyDefaultsInto(*result_column, n); + return result_column; +} + +RollupTransform::RollupTransform(Block header, AggregatingTransformParamsPtr params_, bool use_nulls_) + : GroupByModifierTransform(std::move(header), params_, use_nulls_) + , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) +{} + MutableColumnPtr getColumnWithDefaults(Block const & header, size_t key, size_t n) { auto const & col = header.getByPosition(key); @@ -43,16 +87,11 @@ Chunk RollupTransform::generate() { if (!consumed_chunks.empty()) { - if (consumed_chunks.size() > 1) - rollup_chunk = merge(std::move(consumed_chunks), false); - else - rollup_chunk = std::move(consumed_chunks.front()); - - consumed_chunks.clear(); + mergeConsumed(); last_removed_key = keys.size(); } - auto gen_chunk = std::move(rollup_chunk); + auto gen_chunk = std::move(current_chunk); if (last_removed_key) { @@ -61,11 +100,11 @@ Chunk RollupTransform::generate() auto num_rows = gen_chunk.getNumRows(); auto columns = gen_chunk.getColumns(); - columns[key] = getColumnWithDefaults(getInputPort().getHeader(), key, num_rows); + columns[key] = getColumnWithDefaults(key, num_rows); Chunks chunks; chunks.emplace_back(std::move(columns), num_rows); - rollup_chunk = merge(std::move(chunks), false); + current_chunk = merge(std::move(chunks), !use_nulls, false); } finalizeChunk(gen_chunk, aggregates_mask); diff --git a/src/Processors/Transforms/RollupTransform.h b/src/Processors/Transforms/RollupTransform.h index 1630df23579..e9fa0818779 100644 --- a/src/Processors/Transforms/RollupTransform.h +++ b/src/Processors/Transforms/RollupTransform.h @@ -1,4 +1,6 @@ #pragma once +#include +#include #include #include #include @@ -6,29 +8,49 @@ namespace DB { -/// Takes blocks after grouping, with non-finalized aggregate functions. -/// Calculates subtotals and grand totals values for a set of columns. -class RollupTransform : public IAccumulatingTransform +struct GroupByModifierTransform : public IAccumulatingTransform { -public: - RollupTransform(Block header, AggregatingTransformParamsPtr params); - String getName() const override { return "RollupTransform"; } + GroupByModifierTransform(Block header, AggregatingTransformParamsPtr params_, bool use_nulls_); protected: void consume(Chunk chunk) override; + + void mergeConsumed(); + + Chunk merge(Chunks && chunks, bool is_input, bool final); + + MutableColumnPtr getColumnWithDefaults(size_t key, size_t n) const; + + AggregatingTransformParamsPtr params; + + bool use_nulls; + + ColumnNumbers keys; + + std::unique_ptr output_aggregator; + + Block intermediate_header; + + Chunks consumed_chunks; + Chunk current_chunk; +}; + +/// Takes blocks after grouping, with non-finalized aggregate functions. +/// Calculates subtotals and grand totals values for a set of columns. +class RollupTransform : public GroupByModifierTransform +{ +public: + RollupTransform(Block header, AggregatingTransformParamsPtr params, bool use_nulls_); + String getName() const override { return "RollupTransform"; } + +protected: Chunk generate() override; private: - AggregatingTransformParamsPtr params; - ColumnNumbers keys; const ColumnsMask aggregates_mask; - Chunks consumed_chunks; - Chunk rollup_chunk; size_t last_removed_key = 0; size_t set_counter = 0; - - Chunk merge(Chunks && chunks, bool final); }; } diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index ce2ba23576d..58fed6e5466 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 9895c335c96..ee83c4fa21b 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -9,7 +9,6 @@ #include #include "IServer.h" #include -#include #include #include #include diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 09c3aff4455..fc08c046f93 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -79,13 +79,23 @@ Range createRangeFromParquetStatistics(std::shared_ptr IHiveFile::getRows() { - if (!rows) - rows = getRowsImpl(); + if (!has_init_rows) + { + std::lock_guard lock(mutex); + if (!has_init_rows) + { + rows = getRowsImpl(); + has_init_rows = true; + } + } return rows; } void IHiveFile::loadFileMinMaxIndex() { + if (file_minmax_idx_loaded) + return; + std::lock_guard lock(mutex); if (file_minmax_idx_loaded) return; loadFileMinMaxIndexImpl(); @@ -94,6 +104,9 @@ void IHiveFile::loadFileMinMaxIndex() void IHiveFile::loadSplitMinMaxIndexes() { + if (split_minmax_idxes_loaded) + return; + std::lock_guard lock(mutex); if (split_minmax_idxes_loaded) return; loadSplitMinMaxIndexesImpl(); diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index cbdf17bd5b5..a4bd345aa48 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -149,6 +149,7 @@ protected: String path; UInt64 last_modify_time; size_t size; + std::atomic has_init_rows = false; std::optional rows; NamesAndTypesList index_names_and_types; @@ -162,6 +163,9 @@ protected: /// Skip splits for this file after applying minmax index (if any) std::unordered_set skip_splits; std::shared_ptr storage_settings; + + /// IHiveFile would be shared among multi threads, need lock's protection to update min/max indexes. + std::mutex mutex; }; using HiveFilePtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp index 81445f40ed6..9617d16f6f1 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 9e18dbc6281..3609a65bc71 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -127,12 +127,13 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write { if (part && part->isProjectionPart()) { - data.reportBrokenPart(part->getParentPart()->name); + auto parent_part = part->getParentPart()->shared_from_this(); + data.reportBrokenPart(parent_part); } + else if (part) + data.reportBrokenPart(part); else - { - data.reportBrokenPart(part_name); - } + LOG_TRACE(log, "Part {} was not found, do not report it as broken", part_name); }; try diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index b8aeb8e6a5a..3acb4910e28 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 0f6d38d2cbf..7488b9655fe 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 08d39091cfd..234487763d7 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -124,7 +124,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) /// All operations with queues are considered no to do any allocations - auto erase_from_active = [this, item]() TSA_REQUIRES(mutex) + auto erase_from_active = [this, &item]() TSA_REQUIRES(mutex) { active.erase(std::remove(active.begin(), active.end(), item), active.end()); }; @@ -157,11 +157,10 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) if (need_execute_again) { std::lock_guard guard(mutex); + erase_from_active(); if (item->is_currently_deleting) { - erase_from_active(); - /// This is significant to order the destructors. { NOEXCEPT_SCOPE({ @@ -179,7 +178,6 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) /// Otherwise the destruction of the task won't be ordered with the destruction of the /// storage. pending.push(std::move(item)); - erase_from_active(); has_tasks.notify_one(); item = nullptr; return; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3b39100b3de..727ebc9c3cc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6031,8 +6031,10 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr & data_part) con broken_part_callback(part->name); } } - else + else if (data_part && data_part->getState() == IMergeTreeDataPart::State::Active) broken_part_callback(data_part->name); + else + LOG_DEBUG(log, "Will not check potentially broken part {} because it's not active", data_part->getNameWithState()); } MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & partition_ast, ContextPtr local_context) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 0b6e757ab49..7c3bc21f391 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -56,6 +56,9 @@ struct ZeroCopyLock; class IBackupEntry; using BackupEntries = std::vector>>; +class MergeTreeTransaction; +using MergeTreeTransactionPtr = std::shared_ptr; + /// Auxiliary struct holding information about the future merged or mutated part. struct EmergingPartInfo { @@ -669,12 +672,7 @@ public: AlterLockHolder & table_lock_holder); /// Should be called if part data is suspected to be corrupted. - void reportBrokenPart(const String & name) const - { - broken_part_callback(name); - } - - /// Same as above but has the ability to check all other parts + /// Has the ability to check all other parts /// which reside on the same disk of the suspicious part. void reportBrokenPart(MergeTreeData::DataPartPtr & data_part) const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index d9fc8ccaf42..3916eae1556 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -383,6 +383,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( merge_threads, temporary_data_merge_threads, /* storage_has_evenly_distributed_read_= */ false, + /* group_by_use_nulls */ false, std::move(group_by_info), std::move(group_by_sort_description), should_produce_results_in_order_of_bucket_number); diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp index 280ce82cfce..655ca003deb 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp @@ -44,7 +44,7 @@ catch (...) { /// Suspicion of the broken part. A part is added to the queue for verification. if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) - storage.reportBrokenPart(data_part->name); + storage.reportBrokenPart(data_part); throw; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 81026989f95..4ea6ec11ecc 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 3eb638d15c0..9ed8fe0ad14 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 7967726edca..2c32d9f266c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2336,6 +2336,12 @@ bool ReplicatedMergeTreeMergePredicate::hasDropRange(const MergeTreePartInfo & n return queue.hasDropRange(new_drop_range_info); } +String ReplicatedMergeTreeMergePredicate::getCoveringVirtualPart(const String & part_name) const +{ + std::lock_guard lock(queue.state_mutex); + return queue.virtual_parts.getContainingPart(MergeTreePartInfo::fromPartName(part_name, queue.format_version)); +} + ReplicatedMergeTreeQueue::SubscriberHandler ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCallBack && callback) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index a88d9182bbf..f4cae7152ef 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -519,8 +519,12 @@ public: /// The version of "log" node that is used to check that no new merges have appeared. int32_t getVersion() const { return merges_version; } + /// Returns true if there's a drop range covering new_drop_range_info bool hasDropRange(const MergeTreePartInfo & new_drop_range_info) const; + /// Returns virtual part covering part_name (if any) or empty string + String getCoveringVirtualPart(const String & part_name) const; + private: const ReplicatedMergeTreeQueue & queue; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 3ff4baa0b11..b188cef065e 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index bdb4c392c48..5046a0b6fe0 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -156,6 +156,10 @@ struct SelectQueryInfoBase PrewhereInfoPtr prewhere_info; + /// This is an additional filer applied to current table. + /// It is needed only for additional PK filtering. + ASTPtr additional_filter_ast; + ReadInOrderOptimizerPtr order_optimizer; /// Can be modified while reading from storage InputOrderInfoPtr input_order_info; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 520b5534fe3..1bc4c26e40e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1837,8 +1837,8 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) LOG_TRACE(log, "Executing DROP_RANGE {}", entry.new_part_name); auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block); - part_check_thread.cancelRemovedPartsCheck(drop_range_info); queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry); + part_check_thread.cancelRemovedPartsCheck(drop_range_info); /// Delete the parts contained in the range to be deleted. /// It's important that no old parts remain (after the merge), because otherwise, @@ -1906,8 +1906,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (replace) { getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); - part_check_thread.cancelRemovedPartsCheck(drop_range); queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); + part_check_thread.cancelRemovedPartsCheck(drop_range); } else { @@ -7953,12 +7953,31 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP while (true) { + /// We should be careful when creating an empty part, because we are not sure that this part is still needed. + /// For example, it's possible that part (or partition) was dropped (or replaced) concurrently. + /// We can enqueue part for check from DataPartExchange or SelectProcessor + /// and it's hard to synchronize it with ReplicatedMergeTreeQueue and PartCheckThread... + /// But at least we can ignore parts that are definitely not needed according to virtual parts and drop ranges. + auto pred = queue.getMergePredicate(zookeeper); + String covering_virtual = pred.getCoveringVirtualPart(lost_part_name); + if (covering_virtual.empty()) + { + LOG_WARNING(log, "Will not create empty part instead of lost {}, because there's no covering part in replication queue", lost_part_name); + return false; + } + if (pred.hasDropRange(MergeTreePartInfo::fromPartName(covering_virtual, format_version))) + { + LOG_WARNING(log, "Will not create empty part instead of lost {}, because it's covered by DROP_RANGE", lost_part_name); + return false; + } Coordination::Requests ops; Coordination::Stat replicas_stat; auto replicas_path = fs::path(zookeeper_path) / "replicas"; Strings replicas = zookeeper->getChildren(replicas_path, &replicas_stat); + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/log", pred.getVersion())); + /// In rare cases new replica can appear during check ops.emplace_back(zkutil::makeCheckRequest(replicas_path, replicas_stat.version)); @@ -7988,7 +8007,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } else if (code == Coordination::Error::ZBADVERSION) { - LOG_INFO(log, "Looks like new replica appearead while creating new empty part, will retry"); + LOG_INFO(log, "Looks like log was updated or new replica appeared while creating new empty part, will retry"); } else { diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 5f5a7887e80..d86a0d4f5df 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -131,6 +131,7 @@ const char * auto_contributors[] { "Anton Okhitin", "Anton Okulov", "Anton Patsev", + "Anton Petrov", "Anton Popov", "Anton Tihonov", "Anton Tikhonov", @@ -149,6 +150,7 @@ const char * auto_contributors[] { "Artem Zuikov", "Artemeey", "Artemkin Pavel", + "Arthur Passos", "Arthur Petukhovsky", "Arthur Tokarchuk", "Arthur Wong", @@ -193,7 +195,9 @@ const char * auto_contributors[] { "Chao Ma", "Chao Wang", "CheSema", + "Chebarykov Pavel", "Chen Yufei", + "Cheng Pan", "Chienlung Cheung", "Christian", "Christoph Wurm", @@ -248,6 +252,7 @@ const char * auto_contributors[] { "Dmitry Moskowski", "Dmitry Muzyka", "Dmitry Novik", + "Dmitry Pavlov", "Dmitry Petukhov", "Dmitry Rubashkin", "Dmitry S..ky / skype: dvska-at-skype", @@ -280,6 +285,7 @@ const char * auto_contributors[] { "Evgeniy Udodov", "Evgeny", "Evgeny Konkov", + "Evgeny Kruglov", "Evgeny Markov", "Ewout", "FArthur-cmd", @@ -323,6 +329,7 @@ const char * auto_contributors[] { "Grigory", "Grigory Buteyko", "Grigory Pervakov", + "GruffGemini", "Guillaume Tassery", "Guo Wei (William)", "Haavard Kvaalen", @@ -330,6 +337,7 @@ const char * auto_contributors[] { "HaiBo Li", "Hamoon", "Han Fei", + "Harry Lee", "Harry-Lee", "HarryLeeIBM", "Hasitha Kanchana", @@ -386,6 +394,7 @@ const char * auto_contributors[] { "Jake Liu", "Jakub Kuklis", "James Maidment", + "James Morrison", "JaosnHsieh", "Jason", "Jason Keirstead", @@ -402,6 +411,7 @@ const char * auto_contributors[] { "John Hummel", "John Skopis", "Jonatas Freitas", + "Jordi Villar", "João Figueiredo", "Julian Gilyadov", "Julian Zhou", @@ -444,6 +454,7 @@ const char * auto_contributors[] { "Larry Luo", "Lars Eidnes", "Latysheva Alexandra", + "Laurie Li", "Lemore", "Leonardo Cecchi", "Leonid Krylov", @@ -516,6 +527,7 @@ const char * auto_contributors[] { "Michael Monashev", "Michael Nutt", "Michael Razuvaev", + "Michael Schnerring", "Michael Smitasin", "Michail Safronov", "Michal Lisowski", @@ -632,6 +644,7 @@ const char * auto_contributors[] { "Pawel Rog", "Peignon Melvyn", "Peng Jian", + "Peng Liu", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", "Pervakov Grigory", @@ -643,6 +656,7 @@ const char * auto_contributors[] { "Pxl", "Pysaoke", "Quid37", + "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", "Rajkumar Varada", @@ -670,6 +684,7 @@ const char * auto_contributors[] { "Roman Nozdrin", "Roman Peshkurov", "Roman Tsisyk", + "Roman Vasin", "Roman Zhukov", "Roy Bellingan", "Ruslan", @@ -685,6 +700,7 @@ const char * auto_contributors[] { "SaltTan", "Sami Kerola", "Samuel Chou", + "San", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -760,6 +776,7 @@ const char * auto_contributors[] { "Tiaonmmn", "Tigran Khudaverdyan", "Timur Magomedov", + "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", "Tobias Lins", @@ -814,6 +831,8 @@ const char * auto_contributors[] { "Vladimir C", "Vladimir Ch", "Vladimir Chebotarev", + "Vladimir Chebotaryov", + "Vladimir Galunshchikov", "Vladimir Golovchenko", "Vladimir Goncharov", "Vladimir Klimontovich", @@ -823,6 +842,7 @@ const char * auto_contributors[] { "Vladimir Smirnov", "Vladislav Rassokhin", "Vladislav Smirnov", + "Vladislav V", "Vojtech Splichal", "Volodymyr Kuznetsov", "Vsevolod Orlov", @@ -831,6 +851,7 @@ const char * auto_contributors[] { "W", "Wang Fenjin", "WangZengrui", + "Wangyang Guo", "Weiqing Xu", "William Shallum", "Winter Zhang", @@ -838,6 +859,7 @@ const char * auto_contributors[] { "Xianda Ke", "Xiang Zhou", "Xin Wang", + "Xoel Lopez Barata", "Xudong Zhang", "Y Lu", "Yakko Majuri", @@ -855,6 +877,8 @@ const char * auto_contributors[] { "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Yu, Peng", + "Yuko Takagi", "Yuntao Wu", "Yuri Dyachenko", "Yurii Vlasenko", @@ -871,6 +895,7 @@ const char * auto_contributors[] { "Zijie Lu", "Zoran Pandovski", "a.palagashvili", + "aaapetrenko", "abdrakhmanov", "abel-wang", "abyss7", @@ -933,6 +958,7 @@ const char * auto_contributors[] { "chang.chen", "changvvb", "chasingegg", + "chen", "chen9t", "chengy8934", "chenjian", @@ -1110,6 +1136,8 @@ const char * auto_contributors[] { "linceyou", "lincion", "lingo-xp", + "lingpeng0314", + "lirulei", "listar", "litao91", "liu-bov", @@ -1119,10 +1147,13 @@ const char * auto_contributors[] { "liuyimin", "liyang", "liyang830", + "lokax", "lomberts", "loneylee", "long2ice", + "loyispa", "lthaooo", + "ltrk2", "ltybc-coder", "luc1ph3r", "lulichao", @@ -1213,6 +1244,7 @@ const char * auto_contributors[] { "redclusive", "rfraposa", "ritaank", + "rnbondarenko", "robert", "robot-clickhouse", "robot-metrika-test", @@ -1225,6 +1257,7 @@ const char * auto_contributors[] { "ryzuo", "s-kat", "santaux", + "santrancisco", "satanson", "save-my-heart", "sdk2", @@ -1327,6 +1360,7 @@ const char * auto_contributors[] { "zhangxiao871", "zhangyifan27", "zhangyuli1", + "zhao zhou", "zhen ni", "zhifeng", "zhongyuankai", diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 01bba669c0e..1b207d1d165 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemSettingsChanges.cpp b/src/Storages/System/StorageSystemSettingsChanges.cpp new file mode 100644 index 00000000000..e84fd44fcc3 --- /dev/null +++ b/src/Storages/System/StorageSystemSettingsChanges.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +NamesAndTypesList StorageSystemSettingsChanges::getNamesAndTypes() +{ + return { + {"version", std::make_shared()}, + {"changes", + std::make_shared(std::make_shared( + DataTypes{ + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared()}, + Names{"name", "previous_value", "new_value", "reason"}))}, + }; +} + +void StorageSystemSettingsChanges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +{ + for (auto it = settings_changes_history.rbegin(); it != settings_changes_history.rend(); ++it) + { + res_columns[0]->insert(it->first.toString()); + Array changes; + for (const auto & change : it->second) + changes.push_back(Tuple{change.name, toString(change.previous_value), toString(change.new_value), change.reason}); + res_columns[1]->insert(changes); + } +} + +} diff --git a/src/Storages/System/StorageSystemSettingsChanges.h b/src/Storages/System/StorageSystemSettingsChanges.h new file mode 100644 index 00000000000..283487df51b --- /dev/null +++ b/src/Storages/System/StorageSystemSettingsChanges.h @@ -0,0 +1,28 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + + +/** Implements system table "settings_changes", which allows to get information + * about the settings changes through different ClickHouse versions. + */ +class StorageSystemSettingsChanges final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemSettingsChanges"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index a86a04c4444..dbef2df953b 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database) attach(context, system_database, "functions"); attach(context, system_database, "events"); attach(context, system_database, "settings"); + attach(context, system_database, "settings_changes"); attach>(context, system_database, "merge_tree_settings"); attach>(context, system_database, "replicated_merge_tree_settings"); attach(context, system_database, "build_options"); diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 8fa4d02e8e1..8acd7434d51 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -123,6 +123,17 @@ ColumnsDescription getStructureOfRemoteTable( std::string fail_messages; + /// Use local shard as first priority, as it needs no network communication + for (const auto & shard_info : shards_info) + { + if (shard_info.isLocal()) + { + const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr); + chassert(!res.empty()); + return res; + } + } + for (const auto & shard_info : shards_info) { try diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index a53ce6715d5..420ca7a0ff7 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -3,8 +3,9 @@ import time import os import csv -from env_helper import GITHUB_REPOSITORY +from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from ci_config import CI_CONFIG +from pr_info import SKIP_SIMPLE_CHECK_LABEL RETRY = 5 @@ -73,3 +74,28 @@ def post_labels(gh, pr_info, labels_names): pull_request = repo.get_pull(pr_info.number) for label in labels_names: pull_request.add_to_labels(label) + + +def fail_simple_check(gh, pr_info, description): + if SKIP_SIMPLE_CHECK_LABEL in pr_info.labels: + return + commit = get_commit(gh, pr_info.sha) + commit.create_status( + context="Simple Check", + description=description, + state="failure", + target_url=GITHUB_RUN_URL, + ) + + +def create_simple_check(gh, pr_info): + commit = get_commit(gh, pr_info.sha) + for status in commit.get_statuses(): + if "Simple Check" in status.context: + return + commit.create_status( + context="Simple Check", + description="Skipped", + state="success", + target_url=GITHUB_RUN_URL, + ) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index ce5a4195ceb..2e4d54f34c2 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -8,13 +8,16 @@ import sys from github import Github -from env_helper import CACHES_PATH, TEMP_PATH, GITHUB_SERVER_URL, GITHUB_REPOSITORY -from pr_info import FORCE_TESTS_LABEL, PRInfo, SKIP_SIMPLE_CHECK_LABEL +from env_helper import CACHES_PATH, TEMP_PATH +from pr_info import FORCE_TESTS_LABEL, PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit +from commit_status_helper import ( + post_commit_status, + fail_simple_check, +) from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, @@ -219,16 +222,5 @@ if __name__ == "__main__": if FORCE_TESTS_LABEL in pr_info.labels and state != "error": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: - if SKIP_SIMPLE_CHECK_LABEL not in pr_info.labels: - url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" - "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" - ) - commit = get_commit(gh, pr_info.sha) - commit.create_status( - context="Simple Check", - description=f"{NAME} failed", - state="failed", - target_url=url, - ) + fail_simple_check(gh, pr_info, f"{NAME} failed") sys.exit(1) diff --git a/tests/ci/rerun_helper.py b/tests/ci/rerun_helper.py index 35363593db6..0d523640f56 100644 --- a/tests/ci/rerun_helper.py +++ b/tests/ci/rerun_helper.py @@ -36,3 +36,9 @@ class RerunHelper: ): return True return False + + def get_finished_status(self): + for status in self.statuses: + if self.check_name in status.context: + return status + return None diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index b6d654c7bed..a39d97ce81d 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -6,7 +6,12 @@ from typing import Tuple from github import Github -from commit_status_helper import get_commit, post_labels, remove_labels +from commit_status_helper import ( + get_commit, + post_labels, + remove_labels, + create_simple_check, +) from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo @@ -223,12 +228,7 @@ if __name__ == "__main__": if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - commit.create_status( - context="Simple Check", - description="Skipped", - state="success", - target_url=GITHUB_RUN_URL, - ) + create_simple_check(gh, pr_info) if description_error: print( diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 84ed9e5a124..ce638c7fac5 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,15 +10,16 @@ from github import Github from env_helper import ( RUNNER_TEMP, GITHUB_WORKSPACE, - GITHUB_REPOSITORY, - GITHUB_SERVER_URL, ) from s3_helper import S3Helper -from pr_info import PRInfo, SKIP_SIMPLE_CHECK_LABEL +from pr_info import PRInfo from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit +from commit_status_helper import ( + post_commit_status, + fail_simple_check, +) from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, @@ -124,17 +125,6 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": - if SKIP_SIMPLE_CHECK_LABEL not in pr_info.labels: - url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" - "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" - ) - commit = get_commit(gh, pr_info.sha) - commit.create_status( - context="Simple Check", - description=f"{NAME} failed", - state="failed", - target_url=url, - ) + if state in ["error", "failure"]: + fail_simple_check(gh, pr_info, f"{NAME} failed") sys.exit(1) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5f1d1a32588..7700fc2dffd 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -385,6 +385,7 @@ class ClickHouseCluster: self.with_jdbc_bridge = False self.with_nginx = False self.with_hive = False + self.with_coredns = False self.with_minio = False self.minio_dir = os.path.join(self.instances_dir, "minio") @@ -428,6 +429,8 @@ class ClickHouseCluster: self.schema_registry_port = get_free_port() self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) + self.coredns_host = "coredns" + # available when with_kerberozed_kafka == True self.kerberized_kafka_host = "kerberized_kafka1" self.kerberized_kafka_port = get_free_port() @@ -1102,6 +1105,25 @@ class ClickHouseCluster: ] return self.base_mongo_cmd + def setup_coredns_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_coredns = True + env_variables["COREDNS_CONFIG_DIR"] = instance.path + "/" + "coredns_config" + self.base_cmd.extend( + ["--file", p.join(docker_compose_yml_dir, "docker_compose_coredns.yml")] + ) + + self.base_coredns_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_coredns.yml"), + ] + + return self.base_coredns_cmd + def setup_meili_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_meili = True env_variables["MEILI_HOST"] = self.meili_host @@ -1265,6 +1287,7 @@ class ClickHouseCluster: with_cassandra=False, with_jdbc_bridge=False, with_hive=False, + with_coredns=False, hostname=None, env_variables=None, image="clickhouse/integration-test", @@ -1349,6 +1372,7 @@ class ClickHouseCluster: with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, with_hive=with_hive, + with_coredns=with_coredns, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -1513,6 +1537,11 @@ class ClickHouseCluster: ) ) + if with_coredns and not self.with_coredns: + cmds.append( + self.setup_coredns_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_meili and not self.with_meili: cmds.append( self.setup_meili_cmd(instance, env_variables, docker_compose_yml_dir) @@ -1629,6 +1658,16 @@ class ClickHouseCluster: "IPAddress" ] + def get_instance_global_ipv6(self, instance_name): + logging.debug("get_instance_ip instance_name={}".format(instance_name)) + docker_id = self.get_instance_docker_id(instance_name) + # for cont in self.docker_client.containers.list(): + # logging.debug("CONTAINERS LIST: ID={} NAME={} STATUS={}".format(cont.id, cont.name, cont.status)) + handle = self.docker_client.containers.get(docker_id) + return list(handle.attrs["NetworkSettings"]["Networks"].values())[0][ + "GlobalIPv6Address" + ] + def get_container_id(self, instance_name): return self.get_instance_docker_id(instance_name) # docker_id = self.get_instance_docker_id(instance_name) @@ -2453,6 +2492,12 @@ class ClickHouseCluster: self.up_called = True self.wait_mongo_to_start(30, secure=self.with_mongo_secure) + if self.with_coredns and self.base_coredns_cmd: + logging.debug("Setup coredns") + run_and_check(self.base_coredns_cmd + common_opts) + self.up_called = True + time.sleep(10) + if self.with_meili and self.base_meili_cmd: logging.debug("Setup MeiliSearch") run_and_check(self.base_meili_cmd + common_opts) @@ -2791,6 +2836,7 @@ class ClickHouseInstance: with_azurite, with_jdbc_bridge, with_hive, + with_coredns, with_cassandra, server_bin_path, odbc_bridge_bin_path, @@ -2874,6 +2920,8 @@ class ClickHouseInstance: self.with_cassandra = with_cassandra self.with_jdbc_bridge = with_jdbc_bridge self.with_hive = with_hive + self.with_coredns = with_coredns + self.coredns_config_dir = p.abspath(p.join(base_path, "coredns_config")) self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -3783,6 +3831,11 @@ class ClickHouseInstance: self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets")) ) + if self.with_coredns: + shutil.copytree( + self.coredns_config_dir, p.abspath(p.join(self.path, "coredns_config")) + ) + # Copy config.d configs logging.debug( f"Copy custom test config files {self.custom_main_config_paths} to {self.config_d_dir}" diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index fd052ba9716..25346c22d3b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -81,18 +81,23 @@ EOF""", ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"] ) - expected_error = "no user with such name" + expected_errors = ["no user with such name", "not found in user directories"] while True: out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C") - if expected_error in err: - logging.debug(f"Got error '{expected_error}' just as expected") + found_error = [ + expected_error + for expected_error in expected_errors + if (expected_error in err) + ] + if found_error: + logging.debug(f"Got error '{found_error}' just as expected") break if out == "1\n": logging.debug(f"Got output '1', retrying...") time.sleep(0.5) continue raise Exception( - f"Expected either output '1' or error '{expected_error}', got output={out} and error={err}" + f"Expected either output '1' or one of errors '{expected_errors}', got output={out} and error={err}" ) assert instance.query("SELECT name FROM system.users WHERE name='C'") == "" diff --git a/tests/integration/test_distributed_respect_user_timeouts/test.py b/tests/integration/test_distributed_respect_user_timeouts/test.py index ea79a9544d5..593843b4e4a 100644 --- a/tests/integration/test_distributed_respect_user_timeouts/test.py +++ b/tests/integration/test_distributed_respect_user_timeouts/test.py @@ -129,15 +129,7 @@ def started_cluster(request): def _check_timeout_and_exception(node, user, query_base, query): repeats = EXPECTED_BEHAVIOR[user]["times"] - extra_repeats = 1 - # Table function remote() are executed two times. - # It tries to get table structure from remote shards. - # On 'node2' it will firstly try to get structure from 'node1' (which is not available), - # so there are 1 extra connection attempts for 'node2' and 'remote' - if node.name == "node2" and query_base == "remote": - extra_repeats = 2 - - expected_timeout = EXPECTED_BEHAVIOR[user]["timeout"] * repeats * extra_repeats + expected_timeout = EXPECTED_BEHAVIOR[user]["timeout"] * repeats start = timeit.default_timer() exception = node.query_and_get_error(query, user=user) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml new file mode 100644 index 00000000000..7a2141e6c7e --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml @@ -0,0 +1,11 @@ + + + + + + test1\.example\.com$ + + default + + + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml new file mode 100644 index 00000000000..58ef55cd3f3 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml @@ -0,0 +1,5 @@ + + :: + 0.0.0.0 + 1 + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile new file mode 100644 index 00000000000..0dd198441dc --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/Corefile @@ -0,0 +1,8 @@ +. { + hosts /example.com { + reload "200ms" + fallthrough + } + forward . 127.0.0.11 + log +} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com new file mode 100644 index 00000000000..9beb415c290 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/test.py b/tests/integration/test_host_regexp_multiple_ptr_records/test.py new file mode 100644 index 00000000000..fa2917411e4 --- /dev/null +++ b/tests/integration/test_host_regexp_multiple_ptr_records/test.py @@ -0,0 +1,91 @@ +import pytest +from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check +import os + +DOCKER_COMPOSE_PATH = get_docker_compose_path() +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) + +ch_server = cluster.add_instance( + "clickhouse-server", + with_coredns=True, + main_configs=["configs/listen_host.xml"], + user_configs=["configs/host_regexp.xml"], + ipv6_address="2001:3984:3989::1:1111", +) + +client = cluster.add_instance( + "clickhouse-client", + ipv6_address="2001:3984:3989::1:1112", +) + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def setup_dns_server(ip): + domains_string = "test3.example.com test2.example.com test1.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + +def setup_ch_server(dns_server_ip): + ch_server.exec_in_container( + (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) + ) + ch_server.exec_in_container( + (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) + ) + ch_server.query("SYSTEM DROP DNS CACHE") + + +def build_endpoint_v4(ip): + return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" + + +def build_endpoint_v6(ip): + return build_endpoint_v4(f"[{ip}]") + + +def test_host_regexp_multiple_ptr_v4_fails_with_wrong_resolution(started_cluster): + server_ip = cluster.get_instance_ip("clickhouse-server") + random_ip = "9.9.9.9" + dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) + + setup_dns_server(random_ip) + setup_ch_server(dns_server_ip) + + endpoint = build_endpoint_v4(server_ip) + + assert "1\n" != client.exec_in_container((["bash", "-c", f"curl {endpoint}"])) + + +def test_host_regexp_multiple_ptr_v4(started_cluster): + server_ip = cluster.get_instance_ip("clickhouse-server") + client_ip = cluster.get_instance_ip("clickhouse-client") + dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) + + setup_dns_server(client_ip) + setup_ch_server(dns_server_ip) + + endpoint = build_endpoint_v4(server_ip) + + assert "1\n" == client.exec_in_container((["bash", "-c", f"curl {endpoint}"])) + + +def test_host_regexp_multiple_ptr_v6(started_cluster): + setup_dns_server(client.ipv6_address) + setup_ch_server(cluster.get_instance_global_ipv6(cluster.coredns_host)) + + endpoint = build_endpoint_v6(ch_server.ipv6_address) + + assert "1\n" == client.exec_in_container((["bash", "-c", f"curl -6 {endpoint}"])) diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index eabede3ff00..eb803450ff7 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -1,5 +1,6 @@ SET enable_optimize_predicate_expression = 0; SET convert_query_to_cnf = 0; +SET cross_to_inner_join_rewrite = 1; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/00950_test_gorilla_codec.sql b/tests/queries/0_stateless/00950_test_gorilla_codec.sql index a6e0f1d7b11..e9582480bcb 100644 --- a/tests/queries/0_stateless/00950_test_gorilla_codec.sql +++ b/tests/queries/0_stateless/00950_test_gorilla_codec.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS codecTest; +SET cross_to_inner_join_rewrite = 1; + CREATE TABLE codecTest ( key UInt64, name String, diff --git a/tests/queries/0_stateless/01095_tpch_like_smoke.sql b/tests/queries/0_stateless/01095_tpch_like_smoke.sql index 5971178ade5..1ac9ec229f0 100644 --- a/tests/queries/0_stateless/01095_tpch_like_smoke.sql +++ b/tests/queries/0_stateless/01095_tpch_like_smoke.sql @@ -7,6 +7,8 @@ DROP TABLE IF EXISTS lineitem; DROP TABLE IF EXISTS nation; DROP TABLE IF EXISTS region; +SET cross_to_inner_join_rewrite = 1; + CREATE TABLE part ( p_partkey Int32, -- PK diff --git a/tests/queries/0_stateless/01479_cross_join_9855.sql b/tests/queries/0_stateless/01479_cross_join_9855.sql index 0b549619489..6dc76f22057 100644 --- a/tests/queries/0_stateless/01479_cross_join_9855.sql +++ b/tests/queries/0_stateless/01479_cross_join_9855.sql @@ -1,3 +1,5 @@ +SET cross_to_inner_join_rewrite = 1; + SELECT count() FROM numbers(4) AS n1, numbers(3) AS n2 WHERE n1.number > (select avg(n.number) from numbers(3) n); diff --git a/tests/queries/0_stateless/01911_logical_error_minus.sql b/tests/queries/0_stateless/01911_logical_error_minus.sql index 9813c1a8a5d..3dcdedd38f5 100644 --- a/tests/queries/0_stateless/01911_logical_error_minus.sql +++ b/tests/queries/0_stateless/01911_logical_error_minus.sql @@ -1,6 +1,8 @@ -- This test case is almost completely generated by fuzzer. -- It appeared to trigger assertion. +SET cross_to_inner_join_rewrite = 1; + DROP TABLE IF EXISTS codecTest; CREATE TABLE codecTest ( diff --git a/tests/queries/0_stateless/02098_with_types_use_header.sh b/tests/queries/0_stateless/02098_with_types_use_header.sh index 5d88a994052..457182a08f2 100755 --- a/tests/queries/0_stateless/02098_with_types_use_header.sh +++ b/tests/queries/0_stateless/02098_with_types_use_header.sh @@ -19,9 +19,9 @@ echo -e "y\tz\tx\nString\tDate\tUInt32\ntext\t2020-01-01\t1" | $CLICKHOUSE_CLIEN echo -e "x\tz\ty\nUInt32\tString\tDate\n1\ttext\t2020-01-01" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CustomSeparatedWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "CSVWithNamesAndTypes" -echo -e "'x','y','z'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' -echo -e "'y','z','x'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' -echo -e "'x','z','y'\n'UInt32','String',Date'\n1,'text','2020-01-01'" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "'x','y','z'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +echo -e "'y','z','x'\n'String','Date','UInt32'\n'text','2020-01-01',1" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" && echo 'OK' || echo 'FAIL' +echo -e "'x','z','y'\n'UInt32','String',Date'\n1,'text','2020-01-01'" | $CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02098 FORMAT CSVWithNamesAndTypes" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "JSONCompactEachRowWithNamesAndTypes" diff --git a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh index 08d380bf559..4f38d662590 100755 --- a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh +++ b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh @@ -10,13 +10,13 @@ ${CLICKHOUSE_CLIENT} --query="create table test_02155_csv (A Int64, S String, D echo "input_format_null_as_default = 1" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 1 FORMAT CSV" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 1, format_csv_allow_single_quotes=1 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test_02155_csv" echo "input_format_null_as_default = 0" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 0 FORMAT CSV" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS format_csv_allow_single_quotes = 1, input_format_null_as_default = 0 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh index 6589765f739..e8aa5914912 100755 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -158,8 +158,10 @@ echo "CSV" echo -e "42,Some string,'[1, 2, 3, 4]','[(1, 2, 3)]' 42\,abcd,'[]','[(4, 5, 6)]'" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +CLIENT_CMD="$CLICKHOUSE_CLIENT --format_csv_allow_single_quotes=1" + +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" '[]' @@ -168,8 +170,8 @@ echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'k \"[({}, ['String3'], NULL)]\" \"[({'key3': NULL}, []), NULL]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" '[]' @@ -178,43 +180,43 @@ echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'k \"[({}, ['String3'], NULL)]\" \"[({'key3': NULL}, [], NULL)]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "true false \N" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'[true, NULL]' '[]' '[NULL]' '[false]'" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'(1, 2, 3)'"> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" -echo -e "'123.123'"> $DATA_FILE +echo -e '"123.123"'> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "'[(1, 2, 3)]'"> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" echo -e "\"[(1, 2, 3)]\""> $DATA_FILE -$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" -$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "desc file('$FILE_NAME', 'CSV')" +$CLIENT_CMD -q "select * from file('$FILE_NAME', 'CSV')" diff --git a/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference b/tests/queries/0_stateless/02313_group_by_modifiers_with_non_default_types.reference similarity index 100% rename from tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.reference rename to tests/queries/0_stateless/02313_group_by_modifiers_with_non_default_types.reference diff --git a/tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql b/tests/queries/0_stateless/02313_group_by_modifiers_with_non_default_types.sql similarity index 100% rename from tests/queries/0_stateless/02313_group_by_modifiers_with_non-default_types.sql rename to tests/queries/0_stateless/02313_group_by_modifiers_with_non_default_types.sql diff --git a/tests/queries/0_stateless/02313_test_fpc_codec.sql b/tests/queries/0_stateless/02313_test_fpc_codec.sql index 3b1127350f0..4fe54b87c9c 100644 --- a/tests/queries/0_stateless/02313_test_fpc_codec.sql +++ b/tests/queries/0_stateless/02313_test_fpc_codec.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS codecTest; +SET cross_to_inner_join_rewrite = 1; + CREATE TABLE codecTest ( key UInt64, name String, diff --git a/tests/queries/0_stateless/02324_compatibility_setting.reference b/tests/queries/0_stateless/02324_compatibility_setting.reference new file mode 100644 index 00000000000..e3a9ed7a73e --- /dev/null +++ b/tests/queries/0_stateless/02324_compatibility_setting.reference @@ -0,0 +1,19 @@ +allow_settings_after_format_in_insert +22.3 +1 +1 +22.4 +0 +22.5 +0 +async_socket_for_remote +21.2 +1 +21.3 +0 +21.4 +0 +21.5 +1 +21.6 +1 diff --git a/tests/queries/0_stateless/02324_compatibility_setting.sh b/tests/queries/0_stateless/02324_compatibility_setting.sh new file mode 100755 index 00000000000..043f045d9be --- /dev/null +++ b/tests/queries/0_stateless/02324_compatibility_setting.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "allow_settings_after_format_in_insert" +echo "22.3" +$CLICKHOUSE_CLIENT --compatibility=22.3 -q "select value from system.settings where name='allow_settings_after_format_in_insert'" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&compatibility=22.3" -d "select value from system.settings where name='allow_settings_after_format_in_insert'" +echo "22.4" +$CLICKHOUSE_CLIENT --compatibility=22.4 -q "select value from system.settings where name='allow_settings_after_format_in_insert'" +echo "22.5" +$CLICKHOUSE_CLIENT --compatibility=22.5 -q "select value from system.settings where name='allow_settings_after_format_in_insert'" + + +echo "async_socket_for_remote" +echo "21.2" +$CLICKHOUSE_CLIENT --compatibility=21.2 -q "select value from system.settings where name='async_socket_for_remote'" +echo "21.3" +$CLICKHOUSE_CLIENT --compatibility=21.3 -q "select value from system.settings where name='async_socket_for_remote'" +echo "21.4" +$CLICKHOUSE_CLIENT --compatibility=21.4 -q "select value from system.settings where name='async_socket_for_remote'" +echo "21.5" +$CLICKHOUSE_CLIENT --compatibility=21.5 -q "select value from system.settings where name='async_socket_for_remote'" +echo "21.6" +$CLICKHOUSE_CLIENT --compatibility=21.6 -q "select value from system.settings where name='async_socket_for_remote'" + diff --git a/tests/queries/0_stateless/02325_compatibility_setting_2.reference b/tests/queries/0_stateless/02325_compatibility_setting_2.reference new file mode 100644 index 00000000000..9eed1825cc8 --- /dev/null +++ b/tests/queries/0_stateless/02325_compatibility_setting_2.reference @@ -0,0 +1,8 @@ +0 +1 +0 +1 +0 +1 +1 +1 diff --git a/tests/queries/0_stateless/02325_compatibility_setting_2.sql b/tests/queries/0_stateless/02325_compatibility_setting_2.sql new file mode 100644 index 00000000000..5ce0bf1ef8b --- /dev/null +++ b/tests/queries/0_stateless/02325_compatibility_setting_2.sql @@ -0,0 +1,13 @@ +select value from system.settings where name='allow_settings_after_format_in_insert'; +select value from system.settings where name='allow_settings_after_format_in_insert' settings compatibility='22.3'; +select value from system.settings where name='allow_settings_after_format_in_insert'; +set compatibility = '22.3'; +select value from system.settings where name='allow_settings_after_format_in_insert'; +set compatibility = '22.4'; +select value from system.settings where name='allow_settings_after_format_in_insert'; +set allow_settings_after_format_in_insert=1; +select value from system.settings where name='allow_settings_after_format_in_insert'; +set compatibility = '22.4'; +select value from system.settings where name='allow_settings_after_format_in_insert'; +set compatibility = '22.3'; +select value from system.settings where name='allow_settings_after_format_in_insert'; diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.reference b/tests/queries/0_stateless/02326_settings_changes_system_table.reference new file mode 100644 index 00000000000..c4a3c71edfd --- /dev/null +++ b/tests/queries/0_stateless/02326_settings_changes_system_table.reference @@ -0,0 +1,3 @@ +version String +changes Array(Tuple(name String, previous_value String, new_value String, reason String)) +22.5 [('memory_overcommit_ratio_denominator','0','1073741824','Enable memory overcommit feature by default'),('memory_overcommit_ratio_denominator_for_user','0','1073741824','Enable memory overcommit feature by default')] diff --git a/tests/queries/0_stateless/02326_settings_changes_system_table.sql b/tests/queries/0_stateless/02326_settings_changes_system_table.sql new file mode 100644 index 00000000000..e56cd62ce55 --- /dev/null +++ b/tests/queries/0_stateless/02326_settings_changes_system_table.sql @@ -0,0 +1,2 @@ +DESC system.settings_changes; +SELECT * FROM system.settings_changes WHERE version = '22.5' diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls.reference b/tests/queries/0_stateless/02343_group_by_use_nulls.reference new file mode 100644 index 00000000000..24b7bb5277c --- /dev/null +++ b/tests/queries/0_stateless/02343_group_by_use_nulls.reference @@ -0,0 +1,215 @@ +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N \N 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 45 +1 0 1 +1 1 1 +2 0 2 +2 0 2 +3 0 3 +3 1 3 +4 0 4 +4 0 4 +5 0 5 +5 1 5 +6 0 6 +6 0 6 +7 0 7 +7 1 7 +8 0 8 +8 0 8 +9 0 9 +9 1 9 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N 0 20 +\N 1 25 +\N \N 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 20 +0 0 45 +0 1 25 +1 0 1 +1 1 1 +2 0 2 +2 0 2 +3 0 3 +3 1 3 +4 0 4 +4 0 4 +5 0 5 +5 1 5 +6 0 6 +6 0 6 +7 0 7 +7 1 7 +8 0 8 +8 0 8 +9 0 9 +9 1 9 +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +\N 0 20 +\N 1 25 +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; +0 0 0 +0 0 20 +0 1 25 +1 0 1 +2 0 2 +3 0 3 +4 0 4 +5 0 5 +6 0 6 +7 0 7 +8 0 8 +9 0 9 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N \N 45 + +0 0 45 +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 1 +1 \N 1 +2 0 2 +2 \N 2 +3 1 3 +3 \N 3 +4 0 4 +4 \N 4 +5 1 5 +5 \N 5 +6 0 6 +6 \N 6 +7 1 7 +7 \N 7 +8 0 8 +8 \N 8 +9 1 9 +9 \N 9 +\N 0 20 +\N 1 25 +\N \N 45 + +0 0 45 diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls.sql b/tests/queries/0_stateless/02343_group_by_use_nulls.sql new file mode 100644 index 00000000000..a14db824013 --- /dev/null +++ b/tests/queries/0_stateless/02343_group_by_use_nulls.sql @@ -0,0 +1,62 @@ +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; + +SELECT + number, + number % 2, + sum(number) AS val +FROM numbers(10) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY ROLLUP(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM numbers(10) +GROUP BY CUBE(number, number % 2) WITH TOTALS +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.reference b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.reference new file mode 100644 index 00000000000..7a9263e883c --- /dev/null +++ b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.reference @@ -0,0 +1,157 @@ +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 2 +1 \N 2 +2 0 4 +2 \N 4 +3 1 6 +3 \N 6 +4 0 8 +4 \N 8 +5 1 10 +5 \N 10 +6 0 12 +6 \N 12 +7 1 14 +7 \N 14 +8 0 16 +8 \N 16 +9 1 18 +9 \N 18 +\N \N 90 +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 90 +1 0 2 +1 1 2 +2 0 4 +2 0 4 +3 0 6 +3 1 6 +4 0 8 +4 0 8 +5 0 10 +5 1 10 +6 0 12 +6 0 12 +7 0 14 +7 1 14 +8 0 16 +8 0 16 +9 0 18 +9 1 18 +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; +0 0 0 +0 \N 0 +1 1 2 +1 \N 2 +2 0 4 +2 \N 4 +3 1 6 +3 \N 6 +4 0 8 +4 \N 8 +5 1 10 +5 \N 10 +6 0 12 +6 \N 12 +7 1 14 +7 \N 14 +8 0 16 +8 \N 16 +9 1 18 +9 \N 18 +\N 0 40 +\N 1 50 +\N \N 90 +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; +0 0 0 +0 0 0 +0 0 40 +0 0 90 +0 1 50 +1 0 2 +1 1 2 +2 0 4 +2 0 4 +3 0 6 +3 1 6 +4 0 8 +4 0 8 +5 0 10 +5 1 10 +6 0 12 +6 0 12 +7 0 14 +7 1 14 +8 0 16 +8 0 16 +9 0 18 +9 1 18 +SELECT + number, + number % 2, + sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; +0 \N 0 +1 \N 2 +2 \N 4 +3 \N 6 +4 \N 8 +5 \N 10 +6 \N 12 +7 \N 14 +8 \N 16 +9 \N 18 +\N 0 40 +\N 1 50 +SELECT + number, + number % 2, + sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; +0 0 0 +0 0 40 +0 1 50 +1 0 2 +2 0 4 +3 0 6 +4 0 8 +5 0 10 +6 0 12 +7 0 14 +8 0 16 +9 0 18 diff --git a/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql new file mode 100644 index 00000000000..15ac1127de7 --- /dev/null +++ b/tests/queries/0_stateless/02343_group_by_use_nulls_distributed.sql @@ -0,0 +1,51 @@ +-- { echoOn } +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY ROLLUP(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=1; + +SELECT number, number % 2, sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY CUBE(number, number % 2) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls=0; + +SELECT + number, + number % 2, + sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 1; + +SELECT + number, + number % 2, + sum(number) AS val +FROM remote('127.0.0.{2,3}', numbers(10)) +GROUP BY + GROUPING SETS ( + (number), + (number % 2) + ) +ORDER BY (number, number % 2, val) +SETTINGS group_by_use_nulls = 0; + diff --git a/tests/queries/0_stateless/02346_additional_filters.reference b/tests/queries/0_stateless/02346_additional_filters.reference new file mode 100644 index 00000000000..22d53173e71 --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters.reference @@ -0,0 +1,263 @@ +-- { echoOn } + +select * from table_1; +1 a +2 bb +3 ccc +4 dddd +select * from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +1 a +3 ccc +4 dddd +select * from table_1 settings additional_table_filters={'table_1' : 'x != 2 and x != 3'}; +1 a +4 dddd +select x from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +3 +4 +select y from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +a +ccc +dddd +select * from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +1 a +4 dddd +select * from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +1 a +3 ccc +select * from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +1 a +select x from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +4 +select x from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +3 +select x from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +select y from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +a +dddd +select y from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +a +ccc +select y from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +a +select x from table_1 where x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +3 +4 +select x from table_1 prewhere x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +3 +4 +select x from table_1 prewhere x != 2 where x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; +1 +3 +4 +select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy = 0'}; +0 +0 +select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; +select * from system.numbers limit 5; +0 +1 +2 +3 +4 +select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; +1 +3 +5 +7 +9 +select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; +0 +1 +2 +4 +5 +select * from system.numbers limit 5 settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; +0 +1 +2 +4 +5 +select * from (select number from system.numbers limit 5 union all select x from table_1) order by number settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; +0 +1 +1 +2 +3 +4 +4 +5 +select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers' : 'number != 3', 'table_1' : 'x != 2'}; +0 0 +1 1 a +2 0 +4 4 dddd +5 0 +select b + 1 as c from (select a + 1 as b from (select x + 1 as a from table_1)) settings additional_table_filters={'table_1' : 'x != 2 and x != 3'}; +4 +7 +-- { echoOn } +select * from v_numbers; +1 +2 +3 +4 +5 +select * from v_numbers settings additional_table_filters={'system.numbers' : 'number != 3'}; +1 +2 +3 +5 +6 +select * from v_numbers settings additional_table_filters={'v_numbers' : 'x != 3'}; +1 +2 +4 +5 +select * from v_numbers settings additional_table_filters={'system.numbers' : 'number != 3', 'v_numbers' : 'x != 3'}; +1 +2 +5 +6 +-- additional filter for inner tables for Materialized View does not work because it does not create internal interpreter +-- probably it is expected +-- { echoOn } +select * from mv_table; +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from mv_table settings additional_table_filters={'mv_table' : 'x != 5'}; +4 dddd +6 ffffff +7 ggggggg +select * from mv_table settings additional_table_filters={'table_1' : 'x != 5'}; +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from mv_table settings additional_table_filters={'table_2' : 'x != 5'}; +4 dddd +5 eeeee +6 ffffff +7 ggggggg +-- additional filter for inner tables for Merge does not work because it does not create internal interpreter +-- probably it is expected +-- { echoOn } +select * from m_table order by x; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 2'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_2' : 'x != 5'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 2', 'table_2' : 'x != 5'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 4'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_2' : 'x != 4'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 4', 'table_2' : 'x != 4'}; +1 a +2 bb +3 ccc +4 dddd +4 dddd +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'m_table' : 'x != 4'}; +1 a +2 bb +3 ccc +5 eeeee +6 ffffff +7 ggggggg +select * from m_table order by x settings additional_table_filters={'m_table' : 'x != 4', 'table_1' : 'x != 2', 'table_2' : 'x != 5'}; +1 a +2 bb +3 ccc +5 eeeee +6 ffffff +7 ggggggg +-- additional_result_filter + +select * from table_1 settings additional_result_filter='x != 2'; +1 a +3 ccc +4 dddd +select *, x != 2 from table_1 settings additional_result_filter='x != 2'; +1 a 1 +3 ccc 1 +4 dddd 1 +select * from table_1 where x != 1 settings additional_result_filter='x != 2'; +3 ccc +4 dddd +select * from table_1 where x != 1 settings additional_result_filter='x != 2 and x != 3'; +4 dddd +select * from table_1 prewhere x != 3 where x != 1 settings additional_result_filter='x != 2'; +4 dddd +select * from table_1 limit 3 settings additional_result_filter='x != 2'; +1 a +3 ccc +select x + 1 from table_1 settings additional_result_filter='`plus(x, 1)` != 2'; +3 +4 +5 +select * from (select x + 1 as a, y from table_1 union all select x as a, y from table_1) order by a, y settings additional_result_filter='a = 3'; +3 bb +3 ccc +select * from (select x + 1 as a, y from table_1 union all select x as a, y from table_1) order by a, y settings additional_result_filter='a != 3'; +1 a +2 a +2 bb +4 ccc +4 dddd +5 dddd diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql new file mode 100644 index 00000000000..9e0bee4549b --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -0,0 +1,95 @@ +drop table if exists table_1; +drop table if exists table_2; +drop table if exists v_numbers; +drop table if exists mv_table; + +create table table_1 (x UInt32, y String) engine = MergeTree order by x; +insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); + +-- { echoOn } + +select * from table_1; +select * from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +select * from table_1 settings additional_table_filters={'table_1' : 'x != 2 and x != 3'}; +select x from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +select y from table_1 settings additional_table_filters={'table_1' : 'x != 2'}; +select * from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select * from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +select * from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select y from table_1 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select y from table_1 prewhere x != 4 settings additional_table_filters={'table_1' : 'x != 2'}; +select y from table_1 prewhere x != 4 where x != 3 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 where x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 prewhere x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; +select x from table_1 prewhere x != 2 where x != 2 settings additional_table_filters={'table_1' : 'x != 2'}; + +select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy = 0'}; +select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; + +select * from system.numbers limit 5; +select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; +select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; +select * from system.numbers limit 5 settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; +select * from (select number from system.numbers limit 5 union all select x from table_1) order by number settings additional_table_filters={'system.numbers':'number != 3','table_1':'x!=2'}; +select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers' : 'number != 3', 'table_1' : 'x != 2'}; +select b + 1 as c from (select a + 1 as b from (select x + 1 as a from table_1)) settings additional_table_filters={'table_1' : 'x != 2 and x != 3'}; + +-- { echoOff } + +create view v_numbers as select number + 1 as x from system.numbers limit 5; + +-- { echoOn } +select * from v_numbers; +select * from v_numbers settings additional_table_filters={'system.numbers' : 'number != 3'}; +select * from v_numbers settings additional_table_filters={'v_numbers' : 'x != 3'}; +select * from v_numbers settings additional_table_filters={'system.numbers' : 'number != 3', 'v_numbers' : 'x != 3'}; + +-- { echoOff } + +create table table_2 (x UInt32, y String) engine = MergeTree order by x; +insert into table_2 values (4, 'dddd'), (5, 'eeeee'), (6, 'ffffff'), (7, 'ggggggg'); + +create materialized view mv_table to table_2 (x UInt32, y String) as select * from table_1; + +-- additional filter for inner tables for Materialized View does not work because it does not create internal interpreter +-- probably it is expected +-- { echoOn } +select * from mv_table; +select * from mv_table settings additional_table_filters={'mv_table' : 'x != 5'}; +select * from mv_table settings additional_table_filters={'table_1' : 'x != 5'}; +select * from mv_table settings additional_table_filters={'table_2' : 'x != 5'}; + +-- { echoOff } + +create table m_table (x UInt32, y String) engine = Merge(currentDatabase(), '^table_'); + +-- additional filter for inner tables for Merge does not work because it does not create internal interpreter +-- probably it is expected +-- { echoOn } +select * from m_table order by x; +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 2'}; +select * from m_table order by x settings additional_table_filters={'table_2' : 'x != 5'}; +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 2', 'table_2' : 'x != 5'}; +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 4'}; +select * from m_table order by x settings additional_table_filters={'table_2' : 'x != 4'}; +select * from m_table order by x settings additional_table_filters={'table_1' : 'x != 4', 'table_2' : 'x != 4'}; +select * from m_table order by x settings additional_table_filters={'m_table' : 'x != 4'}; +select * from m_table order by x settings additional_table_filters={'m_table' : 'x != 4', 'table_1' : 'x != 2', 'table_2' : 'x != 5'}; + +-- additional_result_filter + +select * from table_1 settings additional_result_filter='x != 2'; +select *, x != 2 from table_1 settings additional_result_filter='x != 2'; +select * from table_1 where x != 1 settings additional_result_filter='x != 2'; +select * from table_1 where x != 1 settings additional_result_filter='x != 2 and x != 3'; +select * from table_1 prewhere x != 3 where x != 1 settings additional_result_filter='x != 2'; + +select * from table_1 limit 3 settings additional_result_filter='x != 2'; + +select x + 1 from table_1 settings additional_result_filter='`plus(x, 1)` != 2'; + +select * from (select x + 1 as a, y from table_1 union all select x as a, y from table_1) order by a, y settings additional_result_filter='a = 3'; +select * from (select x + 1 as a, y from table_1 union all select x as a, y from table_1) order by a, y settings additional_result_filter='a != 3'; diff --git a/tests/queries/0_stateless/02353_explain_ast_optimize.reference b/tests/queries/0_stateless/02353_explain_ast_optimize.reference new file mode 100644 index 00000000000..f4e0de5ca98 --- /dev/null +++ b/tests/queries/0_stateless/02353_explain_ast_optimize.reference @@ -0,0 +1,53 @@ +-- { echoOn } +EXPLAIN AST optimize=0 SELECT * FROM numbers(0); +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Asterisk + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + Function numbers (children 1) + ExpressionList (children 1) + Literal UInt64_0 +EXPLAIN AST optimize=1 SELECT * FROM numbers(0); +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Identifier number + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + Function numbers (children 1) + ExpressionList (children 1) + Literal UInt64_0 +EXPLAIN AST optimize=0 SELECT countDistinct(number) FROM numbers(0); +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Function countDistinct (children 1) + ExpressionList (children 1) + Identifier number + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + Function numbers (children 1) + ExpressionList (children 1) + Literal UInt64_0 +EXPLAIN AST optimize=1 SELECT countDistinct(number) FROM numbers(0); +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Function uniqExact (children 1) + ExpressionList (children 1) + Identifier number + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + Function numbers (children 1) + ExpressionList (children 1) + Literal UInt64_0 diff --git a/tests/queries/0_stateless/02353_explain_ast_optimize.sql b/tests/queries/0_stateless/02353_explain_ast_optimize.sql new file mode 100644 index 00000000000..a46a47a2e64 --- /dev/null +++ b/tests/queries/0_stateless/02353_explain_ast_optimize.sql @@ -0,0 +1,6 @@ +-- { echoOn } +EXPLAIN AST optimize=0 SELECT * FROM numbers(0); +EXPLAIN AST optimize=1 SELECT * FROM numbers(0); +EXPLAIN AST optimize=0 SELECT countDistinct(number) FROM numbers(0); +EXPLAIN AST optimize=1 SELECT countDistinct(number) FROM numbers(0); +-- { echoOff } diff --git a/tests/queries/0_stateless/02353_explain_ast_rewrite.reference b/tests/queries/0_stateless/02353_explain_ast_rewrite.reference deleted file mode 100644 index 5ee3e0d126c..00000000000 --- a/tests/queries/0_stateless/02353_explain_ast_rewrite.reference +++ /dev/null @@ -1,25 +0,0 @@ --- { echoOn } -EXPLAIN AST rewrite=0 SELECT * FROM numbers(0); -SelectWithUnionQuery (children 1) - ExpressionList (children 1) - SelectQuery (children 2) - ExpressionList (children 1) - Asterisk - TablesInSelectQuery (children 1) - TablesInSelectQueryElement (children 1) - TableExpression (children 1) - Function numbers (children 1) - ExpressionList (children 1) - Literal UInt64_0 -EXPLAIN AST rewrite=1 SELECT * FROM numbers(0); -SelectWithUnionQuery (children 1) - ExpressionList (children 1) - SelectQuery (children 2) - ExpressionList (children 1) - Identifier number - TablesInSelectQuery (children 1) - TablesInSelectQueryElement (children 1) - TableExpression (children 1) - Function numbers (children 1) - ExpressionList (children 1) - Literal UInt64_0 diff --git a/tests/queries/0_stateless/02353_explain_ast_rewrite.sql b/tests/queries/0_stateless/02353_explain_ast_rewrite.sql deleted file mode 100644 index 7310aa62704..00000000000 --- a/tests/queries/0_stateless/02353_explain_ast_rewrite.sql +++ /dev/null @@ -1,4 +0,0 @@ --- { echoOn } -EXPLAIN AST rewrite=0 SELECT * FROM numbers(0); -EXPLAIN AST rewrite=1 SELECT * FROM numbers(0); --- { echoOff } diff --git a/tests/queries/0_stateless/02354_parse_timedelta.reference b/tests/queries/0_stateless/02354_parse_timedelta.reference index 9bc208ada9d..f9dd7879057 100644 --- a/tests/queries/0_stateless/02354_parse_timedelta.reference +++ b/tests/queries/0_stateless/02354_parse_timedelta.reference @@ -5,3 +5,7 @@ 36806400 1331 40273293 +1.001001001 +1.001001001 +1.001001001 +1.11111111111 diff --git a/tests/queries/0_stateless/02354_parse_timedelta.sql b/tests/queries/0_stateless/02354_parse_timedelta.sql index 3cf282d715a..29f2bf9fdfc 100644 --- a/tests/queries/0_stateless/02354_parse_timedelta.sql +++ b/tests/queries/0_stateless/02354_parse_timedelta.sql @@ -5,6 +5,10 @@ SELECT parseTimeDelta('0.00123 seconds'); SELECT parseTimeDelta('1yr2mo'); SELECT parseTimeDelta('11s+22min'); SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds'); +SELECT parseTimeDelta('1s1ms1us1ns'); +SELECT parseTimeDelta('1s1ms1μs1ns'); +SELECT parseTimeDelta('1s - 1ms : 1μs ; 1ns'); +SELECT parseTimeDelta('1.11s1.11ms1.11us1.11ns'); -- invalid expressions SELECT parseTimeDelta(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.reference b/tests/queries/0_stateless/02354_tuple_element_with_default.reference new file mode 100644 index 00000000000..d5dfff17ef1 --- /dev/null +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.reference @@ -0,0 +1,26 @@ +z +SELECT tupleElement(t1, \'z\', \'z\') +FROM t_tuple_element_default +0 +SELECT tupleElement(t1, \'z\', 0) +FROM t_tuple_element_default +z +SELECT tupleElement(t2, \'z\', \'z\') +FROM t_tuple_element_default +-------------------- +[(3,4)] +SELECT tupleElement([(1, 2)], \'a\', [(3, 4)]) +-------------------- +SELECT tupleElement(t1, \'a\', [tuple(1)]) +FROM t_tuple_element_default +-------------------- +[(0)] +SELECT tupleElement(t1, \'a\', [tuple(0)]) +FROM t_tuple_element_default +[0] +SELECT tupleElement(t1, \'a\', [0]) +FROM t_tuple_element_default +[0] +[0] +SELECT tupleElement(t1, \'a\', [0]) +FROM t_tuple_element_default diff --git a/tests/queries/0_stateless/02354_tuple_element_with_default.sql b/tests/queries/0_stateless/02354_tuple_element_with_default.sql new file mode 100644 index 00000000000..908a869885b --- /dev/null +++ b/tests/queries/0_stateless/02354_tuple_element_with_default.sql @@ -0,0 +1,50 @@ +DROP TABLE IF EXISTS t_tuple_element_default; + +CREATE TABLE t_tuple_element_default(t1 Tuple(a UInt32, s String), t2 Tuple(UInt32, String)) ENGINE = Memory; +INSERT INTO t_tuple_element_default VALUES ((1, 'a'), (2, 'b')); + +SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 'z') FROM t_tuple_element_default; +SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'z', 0) FROM t_tuple_element_default; +SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t2, 'z', 'z') FROM t_tuple_element_default; + +SELECT tupleElement(t1, 3, 'z') FROM t_tuple_element_default; -- { serverError 127 } +SELECT tupleElement(t1, 0, 'z') FROM t_tuple_element_default; -- { serverError 127 } + +DROP TABLE t_tuple_element_default; + +SELECT '--------------------'; + +SELECT tupleElement(array(tuple(1, 2)), 'a', 0); -- { serverError 645 } +SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(1, 2), tuple(3, 4))); -- { serverError 190 } +SELECT tupleElement(array(array(tuple(1))), 'a', array(array(1, 2, 3))); -- { serverError 190 } + +SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); +EXPLAIN SYNTAX SELECT tupleElement(array(tuple(1, 2)), 'a', array(tuple(3, 4))); + +SELECT '--------------------'; + +CREATE TABLE t_tuple_element_default(t1 Array(Tuple(UInt32)), t2 UInt32) ENGINE = Memory; + +SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(1))) FROM t_tuple_element_default; + +SELECT '--------------------'; + +INSERT INTO t_tuple_element_default VALUES ([(1)], 100); + +SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(tuple(0))) FROM t_tuple_element_default; + +SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; + +INSERT INTO t_tuple_element_default VALUES ([(2)], 200); + +SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; +EXPLAIN SYNTAX SELECT tupleElement(t1, 'a', array(0)) FROM t_tuple_element_default; + +DROP TABLE t_tuple_element_default; + diff --git a/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.reference b/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.reference new file mode 100644 index 00000000000..cd97db4debd --- /dev/null +++ b/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.reference @@ -0,0 +1,2 @@ +2022-01-24 02:30:00.008122000 +1 diff --git a/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.sql b/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.sql new file mode 100644 index 00000000000..77fc9e1183b --- /dev/null +++ b/tests/queries/0_stateless/02364_dictionary_datetime_64_attribute_crash.sql @@ -0,0 +1,15 @@ +create table dat (blockNum Decimal(10,0), eventTimestamp DateTime64(9)) Engine=MergeTree() primary key eventTimestamp; +insert into dat values (1, '2022-01-24 02:30:00.008122000'); + +CREATE DICTIONARY datDictionary +( + `blockNum` Decimal(10, 0), + `eventTimestamp` DateTime64(9) +) +PRIMARY KEY blockNum +SOURCE(CLICKHOUSE(TABLE 'dat')) +LIFETIME(MIN 0 MAX 1000) +LAYOUT(FLAT()); + +select (select eventTimestamp from datDictionary); +select count(*) from dat where eventTimestamp >= (select eventTimestamp from datDictionary); diff --git a/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.reference b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.reference new file mode 100644 index 00000000000..fcb49fa9945 --- /dev/null +++ b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.reference @@ -0,0 +1,7 @@ +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql new file mode 100644 index 00000000000..cdbac93937e --- /dev/null +++ b/tests/queries/0_stateless/02364_setting_cross_to_inner_rewrite.sql @@ -0,0 +1,22 @@ + + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( x Int ) Engine = Memory; +INSERT INTO t1 VALUES ( 1 ), ( 2 ), ( 3 ); + +CREATE TABLE t2 ( x Int ) Engine = Memory; +INSERT INTO t2 VALUES ( 2 ), ( 3 ), ( 4 ); + +SET cross_to_inner_join_rewrite = 1; +SELECT count() = 1 FROM t1, t2 WHERE t1.x > t2.x; +SELECT count() = 2 FROM t1, t2 WHERE t1.x = t2.x; +SELECT count() = 2 FROM t1 CROSS JOIN t2 WHERE t1.x = t2.x; +SELECT count() = 1 FROM t1 CROSS JOIN t2 WHERE t1.x > t2.x; + +SET cross_to_inner_join_rewrite = 2; +SELECT count() = 1 FROM t1, t2 WHERE t1.x > t2.x; -- { serverError INCORRECT_QUERY } +SELECT count() = 2 FROM t1, t2 WHERE t1.x = t2.x; +SELECT count() = 2 FROM t1 CROSS JOIN t2 WHERE t1.x = t2.x; +SELECT count() = 1 FROM t1 CROSS JOIN t2 WHERE t1.x > t2.x; -- do not force rewrite explicit CROSS diff --git a/tests/queries/0_stateless/02370_extractAll_regress.reference b/tests/queries/0_stateless/02370_extractAll_regress.reference new file mode 100644 index 00000000000..aad46128e52 --- /dev/null +++ b/tests/queries/0_stateless/02370_extractAll_regress.reference @@ -0,0 +1 @@ +{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"} ['a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d'] [':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"',':"'] diff --git a/tests/queries/0_stateless/02370_extractAll_regress.sql b/tests/queries/0_stateless/02370_extractAll_regress.sql new file mode 100644 index 00000000000..6d255124948 --- /dev/null +++ b/tests/queries/0_stateless/02370_extractAll_regress.sql @@ -0,0 +1,5 @@ +-- Regression for UB (stack-use-after-scope) in extactAll() +SELECT + '{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"}{"a":"1","b":"2","c":"","d":"4"}' AS json, + extractAll(json, '"([^"]*)":') AS keys, + extractAll(json, ':"\0[^"]*)"') AS values; diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.reference b/tests/queries/1_stateful/00173_group_by_use_nulls.reference new file mode 100644 index 00000000000..02723bf14dd --- /dev/null +++ b/tests/queries/1_stateful/00173_group_by_use_nulls.reference @@ -0,0 +1,10 @@ +1704509 1384 +732797 1336 +598875 1384 +792887 1336 +3807842 1336 +25703952 1336 +716829 1384 +59183 1336 +33010362 1336 +800784 1336 diff --git a/tests/queries/1_stateful/00173_group_by_use_nulls.sql b/tests/queries/1_stateful/00173_group_by_use_nulls.sql new file mode 100644 index 00000000000..7acacc4e579 --- /dev/null +++ b/tests/queries/1_stateful/00173_group_by_use_nulls.sql @@ -0,0 +1,10 @@ +SELECT + CounterID AS k, + quantileBFloat16(0.5)(ResolutionWidth) +FROM remote('127.0.0.{1,2}', test, hits) +GROUP BY k +ORDER BY + count() DESC, + CounterID ASC +LIMIT 10 +SETTINGS group_by_use_nulls = 1; diff --git a/utils/security-generator/SECURITY.md.sh b/utils/security-generator/SECURITY.md.sh index 381f5b4eaa6..15933da7942 100755 --- a/utils/security-generator/SECURITY.md.sh +++ b/utils/security-generator/SECURITY.md.sh @@ -33,7 +33,7 @@ FROM FROM ( WITH - extractGroups(version, 'v(\\d+).(\\d+)') AS v, + extractGroups(version, 'v(\\d+)\\.(\\d+)') AS v, v[1]::UInt8 AS y, v[2]::UInt8 AS m SELECT