diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md index 08d03c284ca..fde5917a8a7 100644 --- a/.github/ISSUE_TEMPLATE/85_bug-report.md +++ b/.github/ISSUE_TEMPLATE/85_bug-report.md @@ -21,8 +21,7 @@ assignees: '' **Enable crash reporting** -> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`: - +> Change "enabled" to true in "send_crash_reports" section in `config.xml`: ``` diff --git a/.gitmodules b/.gitmodules index 70e034b0742..4ebccaec4e9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,7 +13,6 @@ [submodule "contrib/zlib-ng"] path = contrib/zlib-ng url = https://github.com/ClickHouse/zlib-ng - branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest url = https://github.com/google/googletest @@ -47,7 +46,6 @@ [submodule "contrib/arrow"] path = contrib/arrow url = https://github.com/ClickHouse/arrow - branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift @@ -93,7 +91,6 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse/grpc - branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse/aws-sdk-cpp @@ -140,11 +137,9 @@ [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse/cpp-driver - branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv url = https://github.com/ClickHouse/libuv - branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt @@ -157,11 +152,9 @@ [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl url = https://github.com/ClickHouse/cyrus-sasl - branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring - branch = v0.2.66 [submodule "contrib/miniselect"] path = contrib/miniselect url = https://github.com/danlark1/miniselect @@ -174,7 +167,6 @@ [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp url = https://github.com/abseil/abseil-cpp - branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox @@ -187,7 +179,6 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse/boringssl - branch = unknown_branch_from_artur [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse/NuRaft @@ -248,7 +239,6 @@ [submodule "contrib/annoy"] path = contrib/annoy url = https://github.com/ClickHouse/annoy - branch = ClickHouse-master [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl @@ -282,7 +272,6 @@ [submodule "contrib/openssl"] path = contrib/openssl url = https://github.com/openssl/openssl - branch = openssl-3.0 [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark diff --git a/CHANGELOG.md b/CHANGELOG.md index ce57affb161..aa3b998570a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
@@ -10,6 +11,228 @@ # 2023 Changelog +### ClickHouse release 23.8 LTS, 2023-08-31 + +#### Backward Incompatible Change +* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). + +#### New Feature +* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* A system table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). +* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). +* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). +* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). +* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). +* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* TODO: edit it: Add support for plural units. [#53641](https://github.com/ClickHouse/ClickHouse/pull/53641) ([irenjj](https://github.com/irenjj)). +* TODO: edit it: Added server setting validate_tcp_client_information determines whether validation of client information enabled when query packet is received. [#53907](https://github.com/ClickHouse/ClickHouse/pull/53907) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). + +#### Experimental Feature +* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it. +* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). +* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it. + +#### Performance Improvement +* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). +* Use filter by file/path before reading in `url`/`file`/`hdfs` table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). +* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). +* TODO: edit it: This patch will provide a method to deal with all the hashsets in parallel before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). +* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). +* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). +* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). +* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). +* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). +* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). +* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). + +#### Improvement +* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). +* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). +* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). +* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). +* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). +* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). +* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). +* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). +* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). +* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). +* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). +* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). +* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). +* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). +* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). +* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). +* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). +* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). +* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). +* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). +* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). +* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). +* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). +* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). +* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). +* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). +* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). +* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). +* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). +* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). +* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). +* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). +* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). +* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). +* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). +* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). +* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). +* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). + +#### Build/Testing/Packaging Improvement +* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). +* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). +* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). +* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). +* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). + ### ClickHouse release 23.7, 2023-07-27 #### Backward Incompatible Change diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2557ebf78ae..7b98cf43b09 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -136,9 +136,7 @@ add_contrib (aws-cmake ) add_contrib (base64-cmake base64) -if (NOT ARCH_S390X) add_contrib (simdjson-cmake simdjson) -endif() add_contrib (rapidjson-cmake rapidjson) add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) diff --git a/contrib/libpqxx b/contrib/libpqxx index bdd6540fb95..791d68fd899 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d +Subproject commit 791d68fd89902835133c50435e380ec7a73271b7 diff --git a/contrib/openssl b/contrib/openssl index 19cc035b6c6..245cb0291e0 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 19cc035b6c6f2283573d29c7ea7f7d675cf750ce +Subproject commit 245cb0291e0db99d9ccf3692fa76f440b2b054c2 diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h index 2476042c531..49825570d8c 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h index 81c1b93afaa..3c221e1ac23 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h index fb4b49ca349..20b67455f20 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/usearch b/contrib/usearch index 387b78b28b1..f942b6f334b 160000 --- a/contrib/usearch +++ b/contrib/usearch @@ -1 +1 @@ -Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30 +Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index c7206550bd8..ab0d89c3184 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.7.4.5" +ARG VERSION="23.7.5.30" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a3a1f912e90..ac7d6a40518 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.7.4.5" +ARG VERSION="23.7.5.30" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 4936cfeccb0..10697142a51 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.7.4.5" +ARG VERSION="23.7.5.30" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index b06a0dcc830..e37e2800557 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get install -y wget openjdk-8-jre RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \ tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz -RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ +RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz RUN apt install -y vim diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index f67c45f7114..df0f105ab6f 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -103,7 +103,7 @@ RUN python3 -m pip install --no-cache-dir \ urllib3 # Hudi supports only spark 3.3.*, not 3.4 -RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ +RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ && rm spark-3.3.2-bin-hadoop3.tgz diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index ab670209a69..35a6e9c365b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -73,7 +73,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chmod +x ./mc ./minio -RUN wget --no-verbose 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz diff --git a/docs/changelogs/v22.8.21.38-lts.md b/docs/changelogs/v22.8.21.38-lts.md new file mode 100644 index 00000000000..fc919b25735 --- /dev/null +++ b/docs/changelogs/v22.8.21.38-lts.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.21.38-lts (70872e9859e) FIXME as compared to v22.8.20.11-lts (c9ca79e24e8) + +#### Build/Testing/Packaging Improvement +* Backported in [#53017](https://github.com/ClickHouse/ClickHouse/issues/53017): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53459](https://github.com/ClickHouse/ClickHouse/issues/53459): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.11.5-lts.md b/docs/changelogs/v23.3.11.5-lts.md new file mode 100644 index 00000000000..b671c7e5bb6 --- /dev/null +++ b/docs/changelogs/v23.3.11.5-lts.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.11.5-lts (5762a23a76d) FIXME as compared to v23.3.10.5-lts (d8737007f9e) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.5.5.92-stable.md b/docs/changelogs/v23.5.5.92-stable.md new file mode 100644 index 00000000000..ade39b7545d --- /dev/null +++ b/docs/changelogs/v23.5.5.92-stable.md @@ -0,0 +1,62 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.5.92-stable (557edaddace) FIXME as compared to v23.5.4.25-stable (190f962abcf) + +#### Performance Improvement +* Backported in [#52749](https://github.com/ClickHouse/ClickHouse/issues/52749): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51886](https://github.com/ClickHouse/ClickHouse/issues/51886): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#52909](https://github.com/ClickHouse/ClickHouse/issues/52909): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53021](https://github.com/ClickHouse/ClickHouse/issues/53021): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53289](https://github.com/ClickHouse/ClickHouse/issues/53289): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53463](https://github.com/ClickHouse/ClickHouse/issues/53463): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.6.3.87-stable.md b/docs/changelogs/v23.6.3.87-stable.md new file mode 100644 index 00000000000..8db499f308a --- /dev/null +++ b/docs/changelogs/v23.6.3.87-stable.md @@ -0,0 +1,58 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.3.87-stable (36911c17d0f) FIXME as compared to v23.6.2.18-stable (89f39a7ccfe) + +#### Performance Improvement +* Backported in [#52751](https://github.com/ClickHouse/ClickHouse/issues/52751): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52911](https://github.com/ClickHouse/ClickHouse/issues/52911): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53023](https://github.com/ClickHouse/ClickHouse/issues/53023): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53290](https://github.com/ClickHouse/ClickHouse/issues/53290): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53465](https://github.com/ClickHouse/ClickHouse/issues/53465): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.5.30-stable.md b/docs/changelogs/v23.7.5.30-stable.md new file mode 100644 index 00000000000..78bef9fb489 --- /dev/null +++ b/docs/changelogs/v23.7.5.30-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.5.30-stable (e86c21fb922) FIXME as compared to v23.7.4.5-stable (bd2fcd44553) + +#### Build/Testing/Packaging Improvement +* Backported in [#53291](https://github.com/ClickHouse/ClickHouse/issues/53291): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53467](https://github.com/ClickHouse/ClickHouse/issues/53467): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 68d41d4b0a7..8988d8cde62 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -39,7 +39,7 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32) CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') SETTINGS - mode = 'ordred'; + mode = 'ordered'; ``` Using named collections: @@ -60,7 +60,7 @@ Using named collections: CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip') SETTINGS - mode = 'ordred'; + mode = 'ordered'; ``` ## Settings {#s3queue-settings} @@ -188,7 +188,7 @@ Example: CREATE TABLE s3queue_engine_table (name String, value UInt32) ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') SETTINGS - mode = 'unordred', + mode = 'unordered', keeper_path = '/clickhouse/s3queue/'; CREATE TABLE stats (name String, value UInt32) diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 077adf016a3..aa86f10852b 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -1,23 +1,21 @@ # Laion-400M dataset -The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar. +The [Laion-400M dataset](https://laion.ai/blog/laion-400-open-dataset/) contains 400 million images with English image captions. Laion nowadays provides [an even larger dataset](https://laion.ai/blog/laion-5b/) but working with it will be similar. -The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md). +The dataset contains the image URL, embeddings for both the image and the image caption, a similarity score between the image and the image caption, as well as metadata, e.g. the image width/height, the licence and a NSFW flag. We can use the dataset to demonstrate [approximate nearest neighbor search](../../engines/table-engines/mergetree-family/annindexes.md) in ClickHouse. -## Prepare data +## Data preparation -Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data. - -Download data and process it with simple `download.sh` script: +The embeddings and the metadata are stored in separate files in the raw data. A data preparation step downloads the data, merges the files, +converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy -python3 process.py ${1} +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata +python3 process.py ${1} # merge files and convert to CSV ``` - -Where `process.py`: +Script `process.py` is defined as follows: ```python import pandas as pd @@ -35,11 +33,11 @@ im_emb = np.load(npy_file) text_emb = np.load(text_npy) data = pd.read_parquet(metadata_file) -# combine them +# combine files data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False) -# you can save more columns -data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]] +# columns to be imported into ClickHouse +data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_embedding"]] # transform np.arrays to lists data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x)) @@ -48,30 +46,32 @@ data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x)) # this small hack is needed becase caption sometimes contains all kind of quotes data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " ")) -# save data to file +# export data as CSV file data.to_csv(str_i + '.csv', header=False) -# previous files can be removed +# removed raw data files os.system(f"rm {npy_file} {metadata_file} {text_npy}") ``` -You can download data with +To start the data preparation pipeline, run: + ```bash seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}' ``` -The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits. +The dataset is split into 410 files, each file contains ca. 1 million rows. If you like to work with a smaller subset of the data, simply adjust the limits, e.g. `seq 0 9 | ...`. -## Create table for laion +## Create table -Without indexes table can be created by +To create a table without indexes, run: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32) @@ -81,23 +81,23 @@ ORDER BY id SETTINGS index_granularity = 8192 ``` -Fill table with data: +To import the CSV files into ClickHouse: ```sql -INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv' +INSERT INTO laion FROM INFILE '{path_to_csv_files}/*.csv' ``` -## Check data in table without indexes +## Run a brute-force ANN search (without ANN index) -Let's check the work of the following query on the part of the dataset (8 million records): +To run a brute-force approximate nearest neighbor search, run: ```sql -select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30 +SELECT url, caption FROM laion WHERE similarity > 0.2 ORDER BY L2Distance(image_embedding, {target:Array(Float32)}) LIMIT 30 ``` -Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector. +The filter on `similarity` makes sure that the images correspond to the image captions in the query results. `target` is an array of 512 elements and a client parameter. A convenient way to obtain such arrays will be presented at the end of the article. For now, we can run the embedding of a random cat picture as `target`. -**The result** +**Result** ``` ┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐ @@ -114,32 +114,32 @@ Since the embeddings for images and texts may not match, let's also require a ce 8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.) ``` -## Add indexes +## Run a ANN with an ANN index -Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md). +Either create a new table or use [ALTER TABLE ADD INDEX](../../sql-reference/statements/alter/skipping-index.md) to add an ANN index: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32), - INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000, - INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000 + INDEX annoy_image image_embedding TYPE annoy(1000), + INDEX annoy_text text_embedding TYPE annoy(1000) ) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192 ``` -When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search. -Now let's check again with the same query: +By default, Annoy indexes use the L2 distance as metric. Further tuning knobs for index creation and search are described in the Annoy index [documentation](../../engines/table-engines/mergetree-family/annindexes.md). Let's check now again with the same query: ```sql -select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8 +SELECT url, caption FROM test_indexes_laion WHERE similarity > 0.2 ORDER BY l2Distance(image_embedding, {target:Array(Float32)}) LIMIT 8 ``` **Result** @@ -159,15 +159,18 @@ select url, caption from test_indexes_laion where similarity > 0.2 order by L2Di 8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.) ``` -The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching. +The speed increased significantly at the cost of less accurate results. This is because the ANN index only provide approximate search results. Note the example searched for similar image embeddings, yet it is also possible to search for positive image caption embeddings. -## Scripts for embeddings +## Creating embeddings with UDFs -Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model. +One usually wants to create embeddings for new images or new image captions and search for similar image / image caption pairs in the data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) to create the `target` vector without leaving the client. It is important to use the same model to create the data and new embeddings for searches. The following scripts utilize the `ViT-B/32` model which also underlies the dataset. ### Text embeddings +First, store the following Python script in the `user_scripts/` directory of your ClickHouse data path and make it executable (`chmod +x encode_text.py`). + `encode_text.py`: + ```python #!/usr/bin/python3 import clip @@ -182,10 +185,12 @@ if __name__ == '__main__': inputs = clip.tokenize(text) with torch.no_grad(): text_features = model.encode_text(inputs)[0].tolist() + print(text_features) sys.stdout.flush() ``` -`encode_text_function.xml`: +Then create `encode_text_function.xml` in a location referenced by `/path/to/*_function.xml` in your ClickHouse server configuration file. + ```xml @@ -203,19 +208,19 @@ if __name__ == '__main__': ``` -Now we can simply use: +You can now simply use: ```sql SELECT encode_text('cat'); ``` - -The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries +The first run will be slow because it loads the model, but repeated runs will be fast. We can then copy the output to `SET param_target=...` and can easily write queries. ### Image embeddings -For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer) +Image embeddings can be created similarly but we will provide the Python script the path to a local image instead of the image caption text. + +`encode_image.py` -`encode_picture.py` ```python #!/usr/bin/python3 import clip @@ -231,29 +236,31 @@ if __name__ == '__main__': image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device) with torch.no_grad(): image_features = model.encode_image(image)[0].tolist() - print(image_features) + print(image_features) sys.stdout.flush() ``` -`encode_picture_function.xml` +`encode_image_function.xml` + ```xml executable_pool - encode_picture + encode_image Array(Float32) String path TabSeparated - encode_picture.py + encode_image.py 1000000 ``` -The query: +Then run this query: + ```sql -SELECT encode_picture('some/path/to/your/picture'); +SELECT encode_image('/path/to/your/image'); ``` diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index ce5ab24ecb0..32c612dfa5f 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -10,6 +10,10 @@ ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-com ## Enabling the MySQL Interface On ClickHouse Cloud +:::note +The MySQL interface for ClickHouse Cloud is currently in private preview. Please contact support@clickhouse.com to enable this feature for your ClickHouse Cloud service. +::: + 1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab ![Credentials screen - Prompt](./images/mysql1.png) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index a19c55673ed..81a35ad1ea9 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -65,7 +65,7 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. -## Encrypting Configuration {#encryption} +## Encrypting and Hiding Configuration {#encryption} You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. @@ -102,6 +102,21 @@ Example: 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 ``` +Even with applied encryption in the preprocessed file the elements are still saved in plain text. In case this is a problem, we suggest two alternatives: either set file permissions of the preprocessed file to 600 or use the `hide_in_preprocessed` attribute. + +Example: + +```xml + + + + admin + secret + + + +``` + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4d4ff5fc235..66b8c923d5c 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4643,3 +4643,19 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars │ 1.7091 │ 15008753 │ └─────────────────────┴──────────────────────────┘ ``` + +## validate_tcp_client_information {#validate-tcp-client-information} + +Determines whether validation of client information enabled when query packet is received from a client using a TCP connection. + +If `true`, an exception will be thrown on invalid client information from the TCP client. + +If `false`, the data will not be validated. The server will work with clients of all versions. + +The default value is `false`. + +**Example** + +``` xml +true +``` diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 0b17afb7e12..737c2b81dee 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -30,7 +30,7 @@ curl https://clickhouse.com/ | sh The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx). ::: -## Query data in a CSV file using SQL +## Query data in a file using SQL {#query_data_in_file} A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. @@ -57,6 +57,19 @@ The `file` table function creates a table, and you can use `DESCRIBE` to see the ./clickhouse local -q "DESCRIBE file('reviews.tsv')" ``` +:::tip +You are allowed to use globs in file name (See [glob substitutions](/docs/en/sql-reference/table-functions/file.md/#globs-in-path)). + +Examples: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` + +::: + ```response marketplace Nullable(String) customer_id Nullable(Int64) diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 11d4eae7bc8..6f07630298a 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -40,6 +40,32 @@ There are multiple ways of user identification: - `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` - `IDENTIFIED BY 'qwerty'` +Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule. + +```xml + + + + .{12} + be at least 12 characters long + + + \p{N} + contain at least 1 numeric character + + + +``` + +:::note +In ClickHouse Cloud, by default, passwords must meet the following complexity requirements: +- Be at least 12 characters long +- Contain at least 1 numeric character +- Contain at least 1 uppercase character +- Contain at least 1 lowercase character +- Contain at least 1 special character +::: + ## Examples 1. The following username is `name1` and does not require a password - which obviously doesn't provide much security: diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 2863c5c0116..86a4e9639f5 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -60,9 +60,9 @@ Specifics of each optional clause are covered in separate sections, which are li If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`. -### COLUMNS expression +### Dynamic column selection -To match some columns in the result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression, you can use the `COLUMNS` expression. +Dynamic column selection (also known as a COLUMNS expression) allows you to match some columns in a result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression. ``` sql COLUMNS('regexp') diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 085761d80c7..3b037521692 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,7 +85,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. -## Шифрование {#encryption} +## Шифрование и Скрытие {#encryption} Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать. @@ -122,6 +122,21 @@ $ cat /etc/clickhouse-server/users.d/alice.xml 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 ``` +Даже с применённым шифрованием в файле предобработки элементы все равно сохраняются в незашифрованном виде. В случае если это является проблемой, мы предлагаем две альтернативы: или установить разрешения на файл предобработки 600 или использовать аттрибут `hide_in_preprocessed`. + +Пример: + +```xml + + + + admin + secret + + + +``` + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 7b026244624..652a03a0df5 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1923,3 +1923,19 @@ ClickHouse использует ZooKeeper для хранения метадан - Положительное целое число. Значение по умолчанию: `10000`. + +## validate_tcp_client_information {#validate-tcp-client-information} + +Включена ли валидация данных о клиенте при запросе от клиента, использующего TCP соединение. + +Если `true`, то на неверные данные от клиента будет выброшено исключение. + +Если `false`, то данные не будут валидироваться. Сервер будет работать с клиентами всех версий. + +Значение по умолчанию: `false`. + +**Пример** + +``` xml +true +``` diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 61fba2dd7cc..8851be326d4 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -110,3 +110,42 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ├──────────┼──────────┤ ... ``` + +## Запрос данных в файле с помощью SQL {#query_data_in_file} + +Часто `clickhouse-local` используется для выполнения специальных запросов к файлам, когда не нужно вставлять данные в таблицу. `clickhouse-local` может транслировать данные из файла во временную таблицу и выполнить ваш SQL. + +Если файл находится на той же машине, что и `clickhouse-local`, то можно просто указать файл для загрузки. Следующий файл `reviews.tsv` содержит выборку отзывов о товарах Amazon: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +Эта команда является сокращением команды: + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv')" +``` + +ClickHouse знает, что файл использует формат, разделенный табуляцией, из расширения имени файла. Если необходимо явно указать формат, просто добавьте один из [множества входных форматов ClickHouse](../../interfaces/formats.md): + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')" +``` + +Функция таблицы `file` создает таблицу, и вы можете использовать `DESCRIBE` для просмотра предполагаемой схемы: + +```bash +./clickhouse local -q "DESCRIBE file('reviews.tsv')" +``` + +:::tip +В имени файла разрешается использовать [Шаблоны поиска](/docs/ru/sql-reference/table-functions/file.md/#globs-in-path). + +Примеры: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` diff --git a/docs/zh/sql-reference/functions/arithmetic-functions.md b/docs/zh/sql-reference/functions/arithmetic-functions.md index b0a37565c16..49d800fd069 100644 --- a/docs/zh/sql-reference/functions/arithmetic-functions.md +++ b/docs/zh/sql-reference/functions/arithmetic-functions.md @@ -125,7 +125,7 @@ SELECT max2(-1, 2); **语法** ```sql -max2(value1, value2) +min2(value1, value2) ``` **参数** diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 893be4d25d1..6e151bf5dc3 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -2,6 +2,7 @@ #include "Commands.h" #include #include +#include #include #include #include @@ -155,6 +156,11 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) .argument("") .binding("operation-timeout")); + options.addOption( + Poco::Util::Option("config-file", "c", "if set, will try to get a connection string from clickhouse config. default `config.xml`") + .argument("") + .binding("config-file")); + options.addOption( Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`") .argument("") @@ -211,7 +217,14 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) } } - Poco::Logger::root().setLevel(config().getString("log-level", "error")); + String default_log_level; + if (config().has("query")) + /// We don't want to see any information log in query mode, unless it was set explicitly + default_log_level = "error"; + else + default_log_level = "information"; + + Poco::Logger::root().setLevel(config().getString("log-level", default_log_level)); EventNotifier::init(); } @@ -311,9 +324,39 @@ int KeeperClient::main(const std::vector & /* args */) return 0; } - auto host = config().getString("host", "localhost"); - auto port = config().getString("port", "9181"); - zk_args.hosts = {host + ":" + port}; + DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); + + /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. + config_processor.registerEmbeddedConfig("config.xml", ""); + auto clickhouse_config = config_processor.loadConfig(); + + Poco::Util::AbstractConfiguration::Keys keys; + clickhouse_config.configuration->keys("zookeeper", keys); + + if (!config().has("host") && !config().has("port") && !keys.empty()) + { + LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); + + for (const auto & key : keys) + { + String prefix = "zookeeper." + key; + String host = clickhouse_config.configuration->getString(prefix + ".host"); + String port = clickhouse_config.configuration->getString(prefix + ".port"); + + if (clickhouse_config.configuration->has(prefix + ".secure")) + host = "secure://" + host; + + zk_args.hosts.push_back(host + ":" + port); + } + } + else + { + String host = config().getString("host", "localhost"); + String port = config().getString("port", "9181"); + + zk_args.hosts.push_back(host + ":" + port); + } + zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 304faa4870d..6f94e483e20 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -450,11 +450,11 @@ void checkForUsersNotInMainConfig( /// Unused in other builds #if defined(OS_LINUX) -static String readString(const String & path) +static String readLine(const String & path) { ReadBufferFromFile in(path); String contents; - readStringUntilEOF(contents, in); + readStringUntilNewlineInto(contents, in); return contents; } @@ -479,9 +479,16 @@ static void sanityChecks(Server & server) #if defined(OS_LINUX) try { + const std::unordered_set fastClockSources = { + // ARM clock + "arch_sys_counter", + // KVM guest clock + "kvm-clock", + // X86 clock + "tsc", + }; const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource"; - String clocksource = readString(filename); - if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos) + if (!fastClockSources.contains(readLine(filename))) server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename)); } catch (...) @@ -501,7 +508,7 @@ static void sanityChecks(Server & server) try { const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled"; - if (readString(filename).find("[always]") != std::string::npos) + if (readLine(filename).find("[always]") != std::string::npos) server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename)); } catch (...) diff --git a/programs/server/config.xml b/programs/server/config.xml index 85cdda63558..07052441a01 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -419,6 +419,8 @@ 10000 + false + /var/lib/clickhouse/ diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index c3f12caa4a4..6e3367a6d15 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -289,7 +289,7 @@ namespace } bool access_management = config.getBool(user_config + ".access_management", false); - bool named_collection_control = config.getBool(user_config + ".named_collection_control", false); + bool named_collection_control = config.getBool(user_config + ".named_collection_control", false) || config.getBool(user_config + ".named_collection_admin", false); bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); if (grant_queries) diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index b460a66ea22..b89e179ee90 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -169,6 +169,10 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName()); } + /// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function + /// then destroy states (on which src places points to). + virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0; + /// Serializes state (to transmit it over the network, for example). virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version = std::nullopt) const = 0; /// NOLINT @@ -506,6 +510,15 @@ public: static_cast(this)->merge(places[i] + place_offset, rhs[i], arena); } + void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override + { + for (size_t i = 0; i < size; ++i) + { + static_cast(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena); + static_cast(this)->destroy(rhs_places[i] + offset); + } + } + void addBatchSinglePlace( /// NOLINT size_t row_begin, size_t row_end, diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 0d99b29686f..06157405cc5 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -34,6 +34,7 @@ public: static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool) { unsigned long single_level_set_num = 0; + unsigned long all_single_hash_size = 0; for (auto ele : data_vec) { @@ -41,7 +42,17 @@ public: single_level_set_num ++; } - if (single_level_set_num > 0 && single_level_set_num < data_vec.size()) + if (single_level_set_num == data_vec.size()) + { + for (auto ele : data_vec) + all_single_hash_size += ele->size(); + } + + /// If all the hashtables are mixed by singleLevel and twoLevel, or all singleLevel (larger than 6000 for average value), they could be converted into + /// twoLevel hashtables in parallel and then merge together. please refer to the following PR for more details. + /// https://github.com/ClickHouse/ClickHouse/pull/50748 + /// https://github.com/ClickHouse/ClickHouse/pull/52973 + if ((single_level_set_num > 0 && single_level_set_num < data_vec.size()) || ((all_single_hash_size/data_vec.size()) > 6000)) { try { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 495e3bdfd4e..19244134617 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1471,8 +1471,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des sendDataFromPipe( std::move(pipe), parsed_query, - have_data_in_stdin - ); + have_data_in_stdin); } catch (Exception & e) { diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 859afb5ea44..d1b3388b2fb 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -886,7 +886,7 @@ void Connection::sendExternalTablesData(ExternalTablesData & data) return sink; }); executor = pipeline.execute(); - executor->execute(/*num_threads = */ 1); + executor->execute(/*num_threads = */ 1, false); auto read_rows = sink->getNumReadRows(); rows += read_rows; diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 04f55600b40..fe16313c0bf 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -254,6 +255,25 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) #endif +void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) +{ + for (Node * node = config_root->firstChild(); node;) + { + Node * next_node = node->nextSibling(); + if (node->nodeType() == Node::ELEMENT_NODE) + { + Element & element = dynamic_cast(*node); + if (element.hasAttribute("hide_in_preprocessed") && Poco::NumberParser::parseBool(element.getAttribute("hide_in_preprocessed"))) + { + config_root->removeChild(node); + } else + hideRecursive(node); + } + node = next_node; + } +} + + void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { const NodeListPtr with_nodes = with_root->childNodes(); @@ -792,6 +812,24 @@ void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config) #endif +XMLDocumentPtr ConfigProcessor::hideElements(XMLDocumentPtr xml_tree) +{ + /// Create a copy of XML Document because hiding elements from preprocessed_xml document + /// also influences on configuration which has a pointer to preprocessed_xml document. + + XMLDocumentPtr xml_tree_copy = new Poco::XML::Document; + + for (Node * node = xml_tree->firstChild(); node; node = node->nextSibling()) + { + NodePtr new_node = xml_tree_copy->importNode(node, true); + xml_tree_copy->appendChild(new_node); + } + Node * new_config_root = getRootNode(xml_tree_copy.get()); + hideRecursive(new_config_root); + + return xml_tree_copy; +} + void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir) { try @@ -840,7 +878,8 @@ void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std:: writer.setNewLine("\n"); writer.setIndent(" "); writer.setOptions(Poco::XML::XMLWriter::PRETTY_PRINT); - writer.writeNode(preprocessed_path, loaded_config.preprocessed_xml); + XMLDocumentPtr preprocessed_xml_without_hidden_elements = hideElements(loaded_config.preprocessed_xml); + writer.writeNode(preprocessed_path, preprocessed_xml_without_hidden_elements); LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path); } catch (Poco::Exception & e) diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index b4f85b10526..98592d8846e 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -142,6 +142,9 @@ private: void decryptEncryptedElements(LoadedConfig & loaded_config); #endif + void hideRecursive(Poco::XML::Node * config_root); + XMLDocumentPtr hideElements(XMLDocumentPtr xml_tree); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 49ab875297c..6bc5884e5bd 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -402,7 +402,7 @@ struct UInt128HashCRC32 : public UInt128Hash {}; struct UInt128TrivialHash { - size_t operator()(UInt128 x) const { return x.items[0]; } + size_t operator()(UInt128 x) const { return x.items[UInt128::_impl::little(0)]; } }; struct UUIDTrivialHash diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index 8e8df19adee..2c9331a43e2 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -318,3 +318,8 @@ inline void trim(std::string & str, char c = ' ') trimRight(str, c); trimLeft(str, c); } + +constexpr bool containsGlobs(const std::string & str) +{ + return str.find_first_of("*?{") != std::string::npos; +} diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 676af280cad..58b705ca317 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -18,7 +17,6 @@ #include #include -#include #include diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index 96cc19babea..efc10b6ed20 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -47,6 +47,8 @@ public: void tryUpdateConnection(); + bool isConnected() const { return connection != nullptr && connection->is_open(); } + const ConnectionInfo & getConnectionInfo() { return connection_info; } String getInfoForLog() const { return connection_info.host_port; } diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 16803c823ba..ad311f9cc2f 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -28,10 +28,25 @@ public: ConnectionHolder(const ConnectionHolder & other) = delete; + void setBroken() { is_broken = true; } + ~ConnectionHolder() { if (auto_close) + { connection.reset(); + } + else if (is_broken) + { + try + { + connection->getRef().reset(); + } + catch (...) + { + connection.reset(); + } + } pool->returnObject(std::move(connection)); } @@ -49,6 +64,7 @@ private: PoolPtr pool; ConnectionPtr connection; bool auto_close; + bool is_broken = false; }; using ConnectionHolderPtr = std::unique_ptr; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 41447664329..7e346f3596c 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -96,7 +96,8 @@ namespace DB M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) + M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9cc4197899f..ff90f30d2e6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -47,6 +47,7 @@ class IColumn; M(MaxThreads, max_final_threads, 0, "The maximum number of threads to read from table with FINAL.", 0) \ M(UInt64, max_threads_for_indexes, 0, "The maximum number of threads process indices.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ + M(Bool, use_concurrency_control, true, "Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).", 0) \ M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 494d0aa9168..37edfffc0d4 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -29,21 +30,13 @@ template void SerializationDecimalBase::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const typename ColumnType::Container & x = typeid_cast(column).getData(); - - size_t size = x.size(); - - if (limit == 0 || offset + limit > size) + if (const size_t size = x.size(); limit == 0 || offset + limit > size) limit = size - offset; - if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2) + + if constexpr (std::endian::native == std::endian::big) { - for (size_t i = 0; i < limit; i++) - { - auto tmp(x[offset+i]); - char *start = reinterpret_cast(&tmp); - char *end = start + sizeof(FieldType); - std::reverse(start, end); - ostr.write(reinterpret_cast(&tmp), sizeof(FieldType)); - } + std::ranges::for_each( + x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & d) { writeBinaryLittleEndian(d, ostr); }); } else ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); @@ -69,20 +62,14 @@ template void SerializationDecimalBase::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const { typename ColumnType::Container & x = typeid_cast(column).getData(); - size_t initial_size = x.size(); + const size_t initial_size = x.size(); x.resize(initial_size + limit); - size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); - if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2) - { - for (size_t i = 0; i < limit; i++) - { - char *start = reinterpret_cast(&x[initial_size + i]); - char *end = start + sizeof(FieldType); - std::reverse(start, end); - } - } - + const size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); x.resize(initial_size + size / sizeof(FieldType)); + + if constexpr (std::endian::native == std::endian::big) + std::ranges::for_each( + x | std::views::drop(initial_size), [](auto & d) { transformEndianness(d); }); } template class SerializationDecimalBase; diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index df6c0848bbe..fc3a5f0db24 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -145,15 +145,8 @@ void SerializationNumber::serializeBinaryBulk(const IColumn & column, WriteBu if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2) { - static constexpr auto to_little_endian = [](auto i) - { - transformEndianness(i); - return i; - }; - std::ranges::for_each( - x | std::views::drop(offset) | std::views::take(limit) | std::views::transform(to_little_endian), - [&ostr](const auto & i) { ostr.write(reinterpret_cast(&i), sizeof(typename ColumnVector::ValueType)); }); + x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & i) { writeBinaryLittleEndian(i, ostr); }); } else ostr.write(reinterpret_cast(&x[offset]), sizeof(typename ColumnVector::ValueType) * limit); diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 93658fd05a3..13ae8fa9a78 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -136,23 +137,37 @@ void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, c void SerializationUUID::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const { const typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - - size_t size = x.size(); - - if (limit == 0 || offset + limit > size) + if (const size_t size = x.size(); limit == 0 || offset + limit > size) limit = size - offset; - if (limit) + if (limit == 0) + return; + + if constexpr (std::endian::native == std::endian::big) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + std::ranges::for_each( + x | std::views::drop(offset) | std::views::take(limit), [&ostr](const auto & uuid) { writeBinaryLittleEndian(uuid, ostr); }); +#pragma clang diagnostic pop + } + else ostr.write(reinterpret_cast(&x[offset]), sizeof(UUID) * limit); } void SerializationUUID::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const { typename ColumnVector::Container & x = typeid_cast &>(column).getData(); - size_t initial_size = x.size(); + const size_t initial_size = x.size(); x.resize(initial_size + limit); - size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(UUID) * limit); + const size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(UUID) * limit); x.resize(initial_size + size / sizeof(UUID)); -} +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + if constexpr (std::endian::native == std::endian::big) + std::ranges::for_each( + x | std::views::drop(initial_size), [](auto & uuid) { transformEndianness(uuid); }); +#pragma clang diagnostic pop +} } diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 59f9ee67d7b..49f260034db 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -81,22 +81,24 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path); } - /// Check if the corresponding file exists. - if (!fs::exists(table_path)) + if (!containsGlobs(table_path)) { - if (throw_on_error) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path); - else - return false; - } + /// Check if the corresponding file exists. + if (!fs::exists(table_path)) + { + if (throw_on_error) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path); + else + return false; + } - if (!fs::is_regular_file(table_path)) - { - if (throw_on_error) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, - "File is directory, but expected a file: {}", table_path); - else - return false; + if (!fs::is_regular_file(table_path)) + { + if (throw_on_error) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File is directory, but expected a file: {}", table_path); + else + return false; + } } return true; @@ -141,19 +143,18 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont if (!checkTableFilePath(table_path, context_, throw_on_error)) return {}; - String format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error); + auto format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error); if (format.empty()) return {}; - /// If the file exists, create a new table using TableFunctionFile and return it. - auto args = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); + auto ast_function_ptr = makeASTFunction("file", std::make_shared(table_path), std::make_shared(format)); - auto table_function = TableFunctionFactory::instance().get(args, context_); + auto table_function = TableFunctionFactory::instance().get(ast_function_ptr, context_); if (!table_function) return nullptr; /// TableFunctionFile throws exceptions, if table cannot be created. - auto table_storage = table_function->execute(args, context_, name); + auto table_storage = table_function->execute(ast_function_ptr, context_, name); if (table_storage) addTable(name, table_storage); diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index d84967fbae6..36a0642abce 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -288,7 +288,8 @@ public: : ISource(pipeline_.getHeader()) , pipeline(std::move(pipeline_)) , executor(pipeline) - {} + { + } std::string getName() const override { diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index ea40c49ff4b..ae2e3b40fa2 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -25,11 +25,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); @@ -50,6 +45,8 @@ std::unique_ptr getClient( { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); S3::URI uri(endpoint); + if (!uri.key.ends_with('/')) + uri.key.push_back('/'); S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), @@ -61,9 +58,6 @@ std::unique_ptr getClient( settings.request_settings.put_request_throttler, uri.uri.getScheme()); - if (uri.key.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); - client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index d1264affaea..69bdfe01a36 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -104,12 +104,8 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) { String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); S3::URI uri(endpoint); - - if (uri.key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); - - if (uri.key.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); + if (!uri.key.ends_with('/')) + uri.key.push_back('/'); S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); std::shared_ptr s3_storage; diff --git a/src/Formats/IndexForNativeFormat.cpp b/src/Formats/IndexForNativeFormat.cpp index 91ae1e39280..bb410125378 100644 --- a/src/Formats/IndexForNativeFormat.cpp +++ b/src/Formats/IndexForNativeFormat.cpp @@ -20,8 +20,8 @@ void IndexOfBlockForNativeFormat::read(ReadBuffer & istr) auto & column = columns.emplace_back(); readBinary(column.name, istr); readBinary(column.type, istr); - readBinary(column.location.offset_in_compressed_file, istr); - readBinary(column.location.offset_in_decompressed_block, istr); + readBinaryLittleEndian(column.location.offset_in_compressed_file, istr); + readBinaryLittleEndian(column.location.offset_in_decompressed_block, istr); } } @@ -34,8 +34,8 @@ void IndexOfBlockForNativeFormat::write(WriteBuffer & ostr) const const auto & column = columns[i]; writeBinary(column.name, ostr); writeBinary(column.type, ostr); - writeBinary(column.location.offset_in_compressed_file, ostr); - writeBinary(column.location.offset_in_decompressed_block, ostr); + writeBinaryLittleEndian(column.location.offset_in_compressed_file, ostr); + writeBinaryLittleEndian(column.location.offset_in_decompressed_block, ostr); } } diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 91f83a457be..f38f106e9a2 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -44,6 +44,7 @@ inline std::string_view getURLHostRFC(const char * data, size_t size) case '.': case '-': case '+': + case '[': break; case ' ': /// restricted symbols case '\t': @@ -56,7 +57,6 @@ inline std::string_view getURLHostRFC(const char * data, size_t size) case '\\': case '^': case '~': - case '[': case ']': case ';': case '=': @@ -73,6 +73,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos pos = data; } + bool has_open_bracket = false; + bool has_end_bracket = false; + if (*pos == '[') /// IPv6 [2001:db8::1]:80 + { + has_open_bracket = true; + ++pos; + } Pos dot_pos = nullptr; Pos colon_pos = nullptr; bool has_sub_delims = false; @@ -84,10 +91,14 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos switch (*pos) { case '.': + if (has_open_bracket) + return std::string_view{}; if (has_at_symbol || colon_pos == nullptr) dot_pos = pos; break; case ':': + if (has_open_bracket) + continue; if (has_at_symbol || colon_pos) goto done; colon_pos = pos; break; @@ -116,6 +127,13 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos /// registered). has_sub_delims = true; continue; + case ']': + if (has_open_bracket) + { + has_end_bracket = true; + goto done; + } + [[fallthrough]]; case ' ': /// restricted symbols in whole URL case '\t': case '<': @@ -126,7 +144,6 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos case '\\': case '^': case '[': - case ']': if (colon_pos == nullptr) return std::string_view{}; else @@ -138,7 +155,11 @@ done: if (has_sub_delims) return std::string_view{}; if (!has_at_symbol) + { + if (has_open_bracket && has_end_bracket) + return std::string_view(start_of_host, pos - start_of_host); pos = colon_pos ? colon_pos : pos; + } return checkAndReturnHost(pos, dot_pos, start_of_host); } diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 62ab51abd76..b7582b37017 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -163,7 +163,16 @@ namespace ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) + { default_non_const = castColumn(arguments[3], result_type); + if (in->size() > default_non_const->size()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Fourth argument of function {} must be a constant or a column at least as big as the second and third arguments", + getName()); + } + } ColumnPtr in_casted = arguments[0].column; if (arguments.size() == 3) @@ -490,7 +499,7 @@ namespace else if (cache.default_column) column_result.insertFrom(*cache.default_column, 0); else if (default_non_const) - column_result.insertFrom(*default_non_const, 0); + column_result.insertFrom(*default_non_const, i); else column_result.insertFrom(in_casted, i); } diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index 2b7a4cca5de..a411b4bb4b6 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -231,6 +231,8 @@ public: String getFileName() const override { return handle.getFileName(); } + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + Handle releaseHandle() && { return std::move(handle); diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index a19c7abf8dd..fd7a09c4f20 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -312,6 +312,8 @@ public: String getFileName() const override { return handle.getFileName(); } + size_t getFileSize() override { return handle.getFileInfo().uncompressed_size; } + /// Releases owned handle to pass it to an enumerator. HandleHolder releaseHandle() && { diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 232721666e7..52bb9aebae6 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2191,8 +2191,8 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( /// Replace predicate result to constant 1. Node node; node.type = ActionType::COLUMN; - node.result_name = std::move(predicate->result_name); - node.result_type = std::move(predicate->result_type); + node.result_name = predicate->result_name; + node.result_type = predicate->result_type; node.column = node.result_type->createColumnConst(0, 1); if (predicate->type != ActionType::INPUT) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 68d0b577a77..9249c3ce4ce 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2479,48 +2479,21 @@ void NO_INLINE Aggregator::mergeDataNullKey( } } - template void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const { if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization) mergeDataNullKey(table_dst, table_src, arena); + PaddedPODArray dst_places; + PaddedPODArray src_places; + auto merge = [&](AggregateDataPtr & __restrict dst, AggregateDataPtr & __restrict src, bool inserted) { if (!inserted) { -#if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - { - const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; - compiled_functions.merge_aggregate_states_function(dst, src); - - if (compiled_aggregate_functions_holder->compiled_aggregate_functions.functions_count != params.aggregates_size) - { - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - aggregate_functions[i]->merge( - dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - } - - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!is_aggregate_function_compiled[i]) - aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]); - } - } - } - else -#endif - { - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(dst + offsets_of_aggregate_states[i], src + offsets_of_aggregate_states[i], arena); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy(src + offsets_of_aggregate_states[i]); - } + dst_places.push_back(dst); + src_places.push_back(src); } else { @@ -2531,8 +2504,30 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A }; table_src.template mergeToViaEmplace(table_dst, std::move(merge)); - table_src.clearAndShrink(); + +#if USE_EMBEDDED_COMPILER + if constexpr (use_compiled_functions) + { + const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; + compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size()); + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (!is_aggregate_function_compiled[i]) + aggregate_functions[i]->mergeAndDestroyBatch( + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + } + + return; + } +#endif + + for (size_t i = 0; i < params.aggregates_size; ++i) + { + aggregate_functions[i]->mergeAndDestroyBatch( + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + } } diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 1671a8207e6..7a9321e4215 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -226,6 +226,8 @@ KeyMetadataPtr CacheMetadata::getKeyMetadata( if (it == end()) { if (key_not_found_policy == KeyNotFoundPolicy::THROW) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); + else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) return nullptr; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 85597ce881a..9ba7f0b6d1b 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -86,4 +86,10 @@ std::shared_ptr WriteBufferToFileSegment::getReadBufferImpl() return std::make_shared(file_segment->getPathInLocalCache()); } +WriteBufferToFileSegment::~WriteBufferToFileSegment() +{ + /// To be sure that file exists before destructor of segment_holder is called + WriteBufferFromFileDecorator::finalize(); +} + } diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index d39772873f7..21565e297c9 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -16,6 +16,7 @@ public: explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder); void nextImpl() override; + ~WriteBufferToFileSegment() override; private: diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index d007341a1ac..92ef5a0d159 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -213,6 +213,10 @@ String ClientInfo::getVersionStr() const return std::format("{}.{}.{} ({})", client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); } +VersionNumber ClientInfo::getVersionNumber() const +{ + return VersionNumber(client_version_major, client_version_minor, client_version_patch); +} void ClientInfo::fillOSUserHostNameAndVersionInfo() { diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 798fc95954c..70524333047 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -137,6 +138,7 @@ public: bool clientVersionEquals(const ClientInfo & other, bool compare_patch) const; String getVersionStr() const; + VersionNumber getVersionNumber() const; private: void fillOSUserHostNameAndVersionInfo(); diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index ccbce0b3dd4..15b8b40c818 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -318,20 +318,11 @@ void executeQueryWithParallelReplicas( } auto coordinator = std::make_shared(all_replicas_count); - - /// This is a little bit weird, but we construct an "empty" coordinator without - /// any specified reading/coordination method (like Default, InOrder, InReverseOrder) - /// Because we will understand it later during QueryPlan optimization - /// So we place a reference to the coordinator to some common plane like QueryInfo - /// to then tell it about the reading method we chose. - query_info.coordinator = coordinator; - auto external_tables = new_context->getExternalTables(); - auto read_from_remote = std::make_unique( query_ast, new_cluster, - coordinator, + std::move(coordinator), stream_factory.header, stream_factory.processed_stage, main_table, diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 50a6ef437bf..da362225f97 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -44,10 +44,7 @@ public: if (database) { for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - { - const auto & storage_id = table_it->table()->getStorageID(); - result.emplace_back(storage_id.getTableName()); - } + result.emplace_back(table_it->name()); } return result; } diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 5651bdb8a9b..2d1f3ba708a 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -184,7 +184,9 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr PullingAsyncPipelineExecutor executor(io.pipeline); io.pipeline.setProgressCallback(data.getContext()->getProgressCallback()); - while (block.rows() == 0 && executor.pull(block)); + while (block.rows() == 0 && executor.pull(block)) + { + } if (block.rows() == 0) { @@ -216,7 +218,8 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr Block tmp_block; while (tmp_block.rows() == 0 && executor.pull(tmp_block)) - ; + { + } if (tmp_block.rows() != 0) throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index f746732ca9d..24cd3a10579 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -208,7 +208,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); TableExclusiveLockHolder table_excl_lock; /// We don't need any lock for ReplicatedMergeTree and for simple MergeTree @@ -228,10 +228,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue { /// If DROP DICTIONARY query is not used, check if Dictionary can be dropped with DROP TABLE query if (!query.is_dictionary) - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); } else - table->checkTableCanBeDropped(); + table->checkTableCanBeDropped(context_); /// Check dependencies before shutting table down bool check_ref_deps = getContext()->getSettingsRef().check_referential_table_dependencies; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 078499fb013..1f6f2336ab8 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -616,6 +616,7 @@ BlockIO InterpreterInsertQuery::execute() presink_chains.at(0).appendChain(std::move(sink_chains.at(0))); res.pipeline = QueryPipeline(std::move(presink_chains[0])); res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); if (query.hasInlinedData() && !async_insert) { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 505562d9dfb..5dd6af3f6f7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -68,7 +68,6 @@ #include #include #include -#include #include #include @@ -84,12 +83,9 @@ #include #include #include -#include #include #include #include -#include -#include #include #include #include @@ -97,7 +93,6 @@ #include #include -#include "config_version.h" namespace ProfileEvents { @@ -2527,6 +2522,8 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + query_plan.setConcurrencyControl(settings.use_concurrency_control); + /// Aliases in table declaration. if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions) { diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index fb8dec665b4..f50a122f9a2 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -357,27 +357,60 @@ static void compileMergeAggregatesStates(llvm::Module & module, const std::vecto llvm::IRBuilder<> b(module.getContext()); auto * aggregate_data_place_type = b.getInt8Ty()->getPointerTo(); - auto * merge_aggregates_states_func_declaration = llvm::FunctionType::get(b.getVoidTy(), { aggregate_data_place_type, aggregate_data_place_type }, false); - auto * merge_aggregates_states_func = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module); + auto * aggregate_data_places_type = aggregate_data_place_type->getPointerTo(); + auto * size_type = b.getInt64Ty(); + + auto * merge_aggregates_states_func_declaration + = llvm::FunctionType::get(b.getVoidTy(), {aggregate_data_places_type, aggregate_data_places_type, size_type}, false); + auto * merge_aggregates_states_func + = llvm::Function::Create(merge_aggregates_states_func_declaration, llvm::Function::ExternalLinkage, name, module); auto * arguments = merge_aggregates_states_func->args().begin(); - llvm::Value * aggregate_data_place_dst_arg = arguments++; - llvm::Value * aggregate_data_place_src_arg = arguments++; + llvm::Value * aggregate_data_places_dst_arg = arguments++; + llvm::Value * aggregate_data_places_src_arg = arguments++; + llvm::Value * aggregate_places_size_arg = arguments++; auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", merge_aggregates_states_func); b.SetInsertPoint(entry); + /// Initialize loop + + auto * end = llvm::BasicBlock::Create(b.getContext(), "end", merge_aggregates_states_func); + auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", merge_aggregates_states_func); + b.CreateCondBr(b.CreateICmpEQ(aggregate_places_size_arg, llvm::ConstantInt::get(size_type, 0)), end, loop); + + b.SetInsertPoint(loop); + + /// Loop + + auto * counter_phi = b.CreatePHI(size_type, 2); + counter_phi->addIncoming(llvm::ConstantInt::get(size_type, 0), entry); + for (const auto & function_to_compile : functions) { + auto * aggregate_data_place_dst = b.CreateLoad(aggregate_data_place_type, + b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_dst_arg, counter_phi)); + auto * aggregate_data_place_src = b.CreateLoad(aggregate_data_place_type, + b.CreateInBoundsGEP(aggregate_data_place_type->getPointerTo(), aggregate_data_places_src_arg, counter_phi)); + size_t aggregate_function_offset = function_to_compile.aggregate_data_offset; - auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst_arg, aggregate_function_offset); - auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src_arg, aggregate_function_offset); + auto * aggregate_data_place_merge_dst_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_dst, aggregate_function_offset); + auto * aggregate_data_place_merge_src_with_offset = b.CreateConstInBoundsGEP1_64(b.getInt8Ty(), aggregate_data_place_src, aggregate_function_offset); const auto * aggregate_function_ptr = function_to_compile.function; aggregate_function_ptr->compileMerge(b, aggregate_data_place_merge_dst_with_offset, aggregate_data_place_merge_src_with_offset); } + /// End of loop + + auto * current_block = b.GetInsertBlock(); + auto * incremeted_counter = b.CreateAdd(counter_phi, llvm::ConstantInt::get(size_type, 1)); + counter_phi->addIncoming(incremeted_counter, current_block); + + b.CreateCondBr(b.CreateICmpEQ(incremeted_counter, aggregate_places_size_arg), end, loop); + + b.SetInsertPoint(end); b.CreateRetVoid(); } diff --git a/src/Interpreters/JIT/compileFunction.h b/src/Interpreters/JIT/compileFunction.h index fe5abe1988c..84abfa0925a 100644 --- a/src/Interpreters/JIT/compileFunction.h +++ b/src/Interpreters/JIT/compileFunction.h @@ -56,7 +56,7 @@ struct AggregateFunctionWithOffset using JITCreateAggregateStatesFunction = void (*)(AggregateDataPtr); using JITAddIntoAggregateStatesFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *); using JITAddIntoAggregateStatesFunctionSinglePlace = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr); -using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr, AggregateDataPtr); +using JITMergeAggregateStatesFunction = void (*)(AggregateDataPtr *, AggregateDataPtr *, size_t); using JITInsertAggregateStatesIntoColumnsFunction = void (*)(ColumnDataRowsOffset, ColumnDataRowsOffset, ColumnData *, AggregateDataPtr *); struct CompiledAggregateFunctions diff --git a/src/Parsers/ASTForeignKeyDeclaration.h b/src/Parsers/ASTForeignKeyDeclaration.h new file mode 100644 index 00000000000..43c5995055d --- /dev/null +++ b/src/Parsers/ASTForeignKeyDeclaration.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +namespace DB +{ + +/* + * Currently ignore the foreign key node, flesh it out when needed + */ +class ASTForeignKeyDeclaration : public IAST +{ +public: + String name; + + String getID(char) const override { return "Foreign Key"; } + + ASTPtr clone() const override + { + auto res = std::make_shared(); + res->name = name; + return res; + } +}; + +} diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cd399531064..61cac0480a9 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -865,6 +865,10 @@ public: if (!ParserKeyword("FROM").ignore(test_pos, test_expected)) return true; + // If there is a comma after 'from' then the first one was a name of a column + if (test_pos->type == TokenType::Comma) + return true; + /// If we parse a second FROM then the first one was a name of a column if (ParserKeyword("FROM").ignore(test_pos, test_expected)) return true; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 9e40e031c51..bfae57cab1d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -224,17 +225,69 @@ bool ParserProjectionDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & return true; } +bool ParserForeignKeyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_references("REFERENCES"); + ParserCompoundIdentifier table_name_p(true, true); + ParserExpression expression_p; + + ASTPtr name; + ASTPtr expr; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + if (!s_references.ignore(pos, expected)) + return false; + + if (!table_name_p.parse(pos, name, expected)) + return false; + + if (!expression_p.parse(pos, expr, expected)) + return false; + + ParserKeyword s_on("ON"); + while (s_on.ignore(pos, expected)) + { + ParserKeyword s_delete("DELETE"); + ParserKeyword s_update("UPDATE"); + + if (!s_delete.ignore(pos, expected) && !s_update.ignore(pos, expected)) + return false; + + ParserKeyword s_restrict("RESTRICT"); + ParserKeyword s_cascade("CASCADE"); + ParserKeyword s_set_null("SET NULL"); + ParserKeyword s_no_action("NO ACTION"); + ParserKeyword s_set_default("SET DEFAULT"); + + if (!s_restrict.ignore(pos, expected) && !s_cascade.ignore(pos, expected) && + !s_set_null.ignore(pos, expected) && !s_no_action.ignore(pos, expected) && + !s_set_default.ignore(pos, expected)) + { + return false; + } + } + + auto foreign_key = std::make_shared(); + foreign_key->name = "Foreign Key"; + node = foreign_key; + + return true; +} bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_index("INDEX"); ParserKeyword s_constraint("CONSTRAINT"); ParserKeyword s_projection("PROJECTION"); + ParserKeyword s_foreign_key("FOREIGN KEY"); ParserKeyword s_primary_key("PRIMARY KEY"); ParserIndexDeclaration index_p; ParserConstraintDeclaration constraint_p; ParserProjectionDeclaration projection_p; + ParserForeignKeyDeclaration foreign_key_p; ParserColumnDeclaration column_p{true, true}; ParserExpression primary_key_p; @@ -260,6 +313,11 @@ bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expecte if (!primary_key_p.parse(pos, new_node, expected)) return false; } + else if (s_foreign_key.ignore(pos, expected)) + { + if (!foreign_key_p.parse(pos, new_node, expected)) + return false; + } else { if (!column_p.parse(pos, new_node, expected)) @@ -323,6 +381,11 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E constraints->children.push_back(elem); else if (elem->as()) projections->children.push_back(elem); + else if (elem->as()) + { + /// Ignore the foreign key node + continue; + } else if (elem->as() || elem->as()) { if (primary_key) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 0a98923436c..4062ed25c6b 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -403,6 +403,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserForeignKeyDeclaration : public IParserBase +{ +protected: + const char * getName() const override { return "foreign key declaration"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserTablePropertyDeclaration : public IParserBase { protected: diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index be26b77157d..34b0eb8a389 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -790,6 +790,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres */ if (!query_plan.getMaxThreads() || is_remote) query_plan.setMaxThreads(max_threads_execute_query); + + query_plan.setConcurrencyControl(settings.use_concurrency_control); } else { diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index b0f842dec1b..598a51bf0c7 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -32,7 +32,8 @@ struct CompletedPipelineExecutor::Data } }; -static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -45,7 +46,7 @@ static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPt if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -79,9 +80,13 @@ void CompletedPipelineExecutor::execute() /// Avoid passing this to lambda, copy ptr to data instead. /// Destructor of unique_ptr copy raw ptr into local variable first, only then calls object destructor. - auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()] + auto func = [ + data_ptr = data.get(), + num_threads = pipeline.getNumThreads(), + thread_group = CurrentThread::getGroup(), + concurrency_control = pipeline.getConcurrencyControl()] { - threadFunction(*data_ptr, thread_group, num_threads); + threadFunction(*data_ptr, thread_group, num_threads, concurrency_control); }; data->thread = ThreadFromGlobalPool(std::move(func)); @@ -102,7 +107,7 @@ void CompletedPipelineExecutor::execute() { PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); executor.setReadProgressCallback(pipeline.getReadProgressCallback()); - executor.execute(pipeline.getNumThreads()); + executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl()); } } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 1508d834592..4dd65def123 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -3,16 +3,13 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include -#include #include #include @@ -99,7 +96,7 @@ void PipelineExecutor::finish() tasks.finish(); } -void PipelineExecutor::execute(size_t num_threads) +void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); if (num_threads < 1) @@ -110,7 +107,7 @@ void PipelineExecutor::execute(size_t num_threads) try { - executeImpl(num_threads); + executeImpl(num_threads, concurrency_control); /// Execution can be stopped because of exception. Check and rethrow if any. for (auto & node : graph->nodes) @@ -137,12 +134,11 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { if (!is_execution_initialized) { - initializeExecution(1); + initializeExecution(1, true); // Acquire slot until we are done single_thread_slot = slots->tryAcquire(); - if (!single_thread_slot) - abort(); // Unable to allocate slot for the first thread, but we just allocated at least one slot + chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot"); if (yield_flag && *yield_flag) return true; @@ -297,14 +293,16 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie #endif } -void PipelineExecutor::initializeExecution(size_t num_threads) +void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) { is_execution_initialized = true; + size_t use_threads = num_threads; + /// Allocate CPU slots from concurrency control - constexpr size_t min_threads = 1; + size_t min_threads = concurrency_control ? 1uz : num_threads; slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - size_t use_threads = slots->grantedCount(); + use_threads = slots->grantedCount(); Queue queue; graph->initializeExecution(queue); @@ -320,7 +318,7 @@ void PipelineExecutor::spawnThreads() { while (auto slot = slots->tryAcquire()) { - size_t thread_num = threads++; + size_t thread_num = threads.fetch_add(1); /// Count of threads in use should be updated for proper finish() condition. /// NOTE: this will not decrease `use_threads` below initially granted count @@ -352,9 +350,9 @@ void PipelineExecutor::spawnThreads() } } -void PipelineExecutor::executeImpl(size_t num_threads) +void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) { - initializeExecution(num_threads); + initializeExecution(num_threads, concurrency_control); bool finished_flag = false; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 1e7d52d8290..dee12dad282 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -38,7 +38,7 @@ public: /// Execute pipeline in multiple threads. Must be called once. /// In case of exception during execution throws any occurred. - void execute(size_t num_threads); + void execute(size_t num_threads, bool concurrency_control); /// Execute single step. Step will be stopped when yield_flag is true. /// Execution is happened in a single thread. @@ -67,7 +67,7 @@ private: ExecutorTasks tasks; - // Concurrency control related + /// Concurrency control related ConcurrencyControl::AllocationPtr slots; ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep() std::unique_ptr pool; @@ -92,12 +92,12 @@ private: using Queue = std::queue; - void initializeExecution(size_t num_threads); /// Initialize executor contexts and task_queue. + void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue. void finalizeExecution(); /// Check all processors are finished. void spawnThreads(); /// Methods connected to execution. - void executeImpl(size_t num_threads); + void executeImpl(size_t num_threads, bool concurrency_control); void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num); void finish(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index b2608f665b7..345bec395b2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -67,7 +67,8 @@ const Block & PullingAsyncPipelineExecutor::getHeader() const return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); } -static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -80,7 +81,7 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -108,7 +109,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 59d33cbffed..a816ab9ca7f 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -98,7 +98,8 @@ struct PushingAsyncPipelineExecutor::Data } }; -static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads) +static void threadFunction( + PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads, bool concurrency_control) { SCOPE_EXIT_SAFE( if (thread_group) @@ -111,7 +112,7 @@ static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGrou if (thread_group) CurrentThread::attachToGroup(thread_group); - data.executor->execute(num_threads); + data.executor->execute(num_threads, concurrency_control); } catch (...) { @@ -172,7 +173,7 @@ void PushingAsyncPipelineExecutor::start() auto func = [&, thread_group = CurrentThread::getGroup()]() { - threadFunction(*data, thread_group, pipeline.getNumThreads()); + threadFunction(*data, thread_group, pipeline.getNumThreads(), pipeline.getConcurrencyControl()); }; data->thread = ThreadFromGlobalPool(std::move(func)); diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index c5b5dad5aa5..ca46f92eeb4 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -201,5 +201,9 @@ void CreateSetAndFilterOnTheFlyStep::updateOutputStream() output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); } +bool CreateSetAndFilterOnTheFlyStep::isColumnPartOfSetKey(const String & column_name) const +{ + return std::find(column_names.begin(), column_names.end(), column_name) != column_names.end(); +} } diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h index b363991c2f6..023901dba02 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h @@ -35,6 +35,8 @@ public: SetWithStatePtr getSet() const { return own_set; } + bool isColumnPartOfSetKey(const String & column_name) const; + /// Set for another stream. void setFiltering(SetWithStatePtr filtering_set_) { filtering_set = filtering_set_; } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index af47b6ff4cd..3b31a809f9d 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -428,8 +428,15 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes return updated_steps; } - if (auto updated_steps = simplePushDownOverStep(parent_node, nodes, child)) - return updated_steps; + if (const auto * join_filter_set_step = typeid_cast(child.get())) + { + const auto & filter_column_name = assert_cast(parent_node->step.get())->getFilterColumnName(); + bool can_remove_filter = !join_filter_set_step->isColumnPartOfSetKey(filter_column_name); + + Names allowed_inputs = child->getOutputStream().header.getNames(); + if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter)) + return updated_steps; + } if (auto * union_step = typeid_cast(child.get())) { diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index ceda9f97bab..8054209c1c3 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -168,7 +168,6 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( QueryPipelineBuilderPtr last_pipeline; - std::stack stack; stack.push(Frame{.node = root}); diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index f4a6c9097f2..62d658ddccd 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -98,6 +98,9 @@ public: void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } size_t getMaxThreads() const { return max_threads; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + bool getConcurrencyControl() const { return concurrency_control; } + /// Tree node. Step and it's children. struct Node { @@ -120,6 +123,7 @@ private: /// Those fields are passed to QueryPipeline. size_t max_threads = 0; + bool concurrency_control = false; }; std::string debugExplainStep(const IQueryPlanStep & step); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7cf38d40503..0b5eb94dbac 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1038,7 +1038,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// MergeTreeReadPool and MergeTreeThreadSelectProcessor for parallel select. if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0) + parts_to_merge_ranges[range_index]->data_part->info.level > 0 + && data.merging_params.is_deleted_column.empty()) { sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount(); lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index])); @@ -1094,7 +1095,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// with level > 0 then we won't postprocess this part if (settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0) + parts_to_merge_ranges[range_index]->data_part->info.level > 0 && + data.merging_params.is_deleted_column.empty()) { partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes))); continue; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 115e24d5740..a4e81a081a4 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -59,7 +59,6 @@ PostgreSQLSource::PostgreSQLSource( init(sample_block); } - template void PostgreSQLSource::init(const Block & sample_block) { @@ -82,7 +81,8 @@ void PostgreSQLSource::onStart() { try { - tx = std::make_shared(connection_holder->get()); + auto & conn = connection_holder->get(); + tx = std::make_shared(conn); } catch (const pqxx::broken_connection &) { @@ -180,6 +180,27 @@ void PostgreSQLSource::onFinish() if (tx && auto_commit) tx->commit(); + + is_completed = true; +} + +template +PostgreSQLSource::~PostgreSQLSource() +{ + if (!is_completed) + { + try + { + stream.reset(); + tx.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + connection_holder->setBroken(); + } } template diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 312e9f5fb18..8a648ae8bb5 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -28,6 +28,8 @@ public: String getName() const override { return "PostgreSQL"; } + ~PostgreSQLSource() override; + protected: PostgreSQLSource( std::shared_ptr tx_, @@ -54,6 +56,7 @@ private: ExternalResultDescription description; bool started = false; + bool is_completed = false; postgres::ConnectionHolderPtr connection_holder; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index f70ebcf27b8..34f02ba4ead 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -432,6 +432,7 @@ Chain buildPushingToViewsChain( processors.emplace_back(std::move(finalizing_views)); result_chain = Chain(std::move(processors)); result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); + result_chain.setConcurrencyControl(settings.use_concurrency_control); } if (auto * live_view = dynamic_cast(storage.get())) diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index 40718bd968a..ce5992c2548 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -29,7 +29,7 @@ TEST(Processors, PortsConnected) QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); } TEST(Processors, PortsNotConnected) @@ -55,7 +55,7 @@ TEST(Processors, PortsNotConnected) { QueryStatusPtr element; PipelineExecutor executor(processors, element); - executor.execute(1); + executor.execute(1, false); ASSERT_TRUE(false) << "Should have thrown."; } catch (DB::Exception & e) diff --git a/src/QueryPipeline/Chain.h b/src/QueryPipeline/Chain.h index 322e49d0d49..c093fc57ad3 100644 --- a/src/QueryPipeline/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -29,6 +29,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void addSource(ProcessorPtr processor); void addSink(ProcessorPtr processor); void appendChain(Chain chain); @@ -66,6 +69,7 @@ private: /// input port output port std::list processors; size_t num_threads = 0; + bool concurrency_control = false; }; } diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 0476b8e4bbf..f14cf61aac2 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -100,6 +100,9 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } + bool getConcurrencyControl() const { return concurrency_control; } + void setConcurrencyControl(bool concurrency_control_) { concurrency_control = concurrency_control_; } + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback); void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); @@ -157,6 +160,7 @@ private: IOutputFormat * output_format = nullptr; size_t num_threads = 0; + bool concurrency_control = false; friend class PushingPipelineExecutor; friend class PullingPipelineExecutor; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 39d51beaa9d..f9726339872 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -278,6 +278,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// Note: it may be > than settings.max_threads, so we should apply this limit again. bool will_limit_max_threads = true; size_t max_threads = 0; + bool concurrency_control = false; Pipes pipes; QueryPlanResourceHolder resources; @@ -297,6 +298,8 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( /// It may happen if max_distributed_connections > max_threads if (pipeline.max_threads > max_threads_limit) max_threads_limit = pipeline.max_threads; + + concurrency_control = pipeline.getConcurrencyControl(); } QueryPipelineBuilder pipeline; @@ -307,6 +310,7 @@ QueryPipelineBuilder QueryPipelineBuilder::unitePipelines( { pipeline.setMaxThreads(max_threads); pipeline.limitMaxThreads(max_threads_limit); + pipeline.setConcurrencyControl(concurrency_control); } pipeline.setCollectedProcessors(nullptr); @@ -644,6 +648,7 @@ QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder) QueryPipeline res(std::move(builder.pipe)); res.addResources(std::move(builder.resources)); res.setNumThreads(builder.getNumThreads()); + res.setConcurrencyControl(builder.getConcurrencyControl()); res.setProcessListElement(builder.process_list_element); res.setProgressCallback(builder.progress_callback); return res; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index e744e3612ce..5d273df7068 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -183,6 +183,16 @@ public: max_threads = max_threads_; } + void setConcurrencyControl(bool concurrency_control_) + { + concurrency_control = concurrency_control_; + } + + bool getConcurrencyControl() + { + return concurrency_control; + } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } @@ -201,6 +211,8 @@ private: /// Sometimes, more streams are created then the number of threads for more optimal execution. size_t max_threads = 0; + bool concurrency_control = false; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 3370a8c009b..77a5369252e 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1134,7 +1134,7 @@ namespace }); auto executor = cur_pipeline.execute(); - executor->execute(1); + executor->execute(1, false); } } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 983d88b13fc..136f2dd9537 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,20 @@ NameToNameMap convertToQueryParameters(const Settings & passed_params) return query_parameters; } +// This function corrects the wrong client_name from the old client. +// Old clients 28.7 and some intermediate versions of 28.7 were sending different ClientInfo.client_name +// "ClickHouse client" was sent with the hello message. +// "ClickHouse" or "ClickHouse " was sent with the query message. +void correctQueryClientInfo(const ClientInfo & session_client_info, ClientInfo & client_info) +{ + if (client_info.getVersionNumber() <= VersionNumber(23, 8, 1) && + session_client_info.client_name == "ClickHouse client" && + (client_info.client_name == "ClickHouse" || client_info.client_name == "ClickHouse ")) + { + client_info.client_name = "ClickHouse client"; + } +} + void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo & client_info) { // Secondary query may contain different client_info. @@ -1532,7 +1547,11 @@ void TCPHandler::receiveQuery() if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_INFO) { client_info.read(*in, client_tcp_protocol_version); - validateClientInfo(session->getClientInfo(), client_info); + + correctQueryClientInfo(session->getClientInfo(), client_info); + const auto & config_ref = Context::getGlobalContextInstance()->getServerSettings(); + if (config_ref.validate_tcp_client_information) + validateClientInfo(session->getClientInfo(), client_info); } /// Per query settings are also passed via TCP. diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 51839ad973a..35199ec1f84 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -186,6 +186,15 @@ void DistributedAsyncInsertDirectoryQueue::shutdownAndDropAllData() fs::remove_all(path); } +void DistributedAsyncInsertDirectoryQueue::shutdownWithoutFlush() +{ + /// It's incompatible with should_batch_inserts + /// because processFilesWithBatching may push to the queue after shutdown + chassert(!should_batch_inserts); + pending_files.finish(); + task_handle->deactivate(); +} + void DistributedAsyncInsertDirectoryQueue::run() { @@ -401,7 +410,7 @@ try if (!current_file.empty()) processFile(current_file); - while (pending_files.tryPop(current_file)) + while (!pending_files.isFinished() && pending_files.tryPop(current_file)) processFile(current_file); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 45c355bb64e..6378479761d 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -66,6 +66,8 @@ public: void shutdownAndDropAllData(); + void shutdownWithoutFlush(); + static std::shared_ptr createSourceFromFile(const String & file_name); /// For scheduling via DistributedSink. diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dae6f6a7ca9..5faccefd836 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -719,6 +719,7 @@ bool StorageFileLog::streamToViews() { block_io.pipeline.complete(std::move(input)); block_io.pipeline.setNumThreads(max_streams_number); + block_io.pipeline.setConcurrencyControl(new_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 97fec2444af..c2fb82aceee 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -602,7 +602,7 @@ public: /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. /// We do not use mutex because it is not very important that the size could change during the operation. - virtual void checkTableCanBeDropped() const {} + virtual void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const {} /// Similar to above but checks for DETACH. It's only used for DICTIONARIES. virtual void checkTableCanBeDetached() const {} diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 9e558940012..31d431e27fe 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -542,15 +542,22 @@ void KafkaConsumer::storeLastReadMessageOffset() } } -void KafkaConsumer::setExceptionInfo(const cppkafka::Error & err) +void KafkaConsumer::setExceptionInfo(const cppkafka::Error & err, bool with_stacktrace) { - setExceptionInfo(err.to_string()); + setExceptionInfo(err.to_string(), with_stacktrace); } -void KafkaConsumer::setExceptionInfo(const String & text) +void KafkaConsumer::setExceptionInfo(const std::string & text, bool with_stacktrace) { + std::string enriched_text = text; + + if (with_stacktrace) + { + enriched_text.append(StackTrace().toString()); + } + std::lock_guard lock(exception_mutex); - exceptions_buffer.push_back({text, static_cast(Poco::Timestamp().epochTime())}); + exceptions_buffer.push_back({enriched_text, static_cast(Poco::Timestamp().epochTime())}); } /* diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index 91bb2ae8d77..1c3ddd85873 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -105,8 +105,8 @@ public: auto currentTimestamp() const { return current[-1].get_timestamp(); } const auto & currentHeaderList() const { return current[-1].get_header_list(); } String currentPayload() const { return current[-1].get_payload(); } - void setExceptionInfo(const cppkafka::Error & err); - void setExceptionInfo(const String & text); + void setExceptionInfo(const cppkafka::Error & err, bool with_stacktrace = true); + void setExceptionInfo(const std::string & text, bool with_stacktrace = true); void setRDKafkaStat(const std::string & stat_json_string) { std::lock_guard lock(rdkafka_stat_mutex); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 54db0f29cb8..43a3bedfb74 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -732,6 +732,8 @@ void StorageKafka::threadFunc(size_t idx) { assert(idx < tasks.size()); auto task = tasks[idx]; + std::string exception_str; + try { auto table_id = getStorageID(); @@ -771,7 +773,24 @@ void StorageKafka::threadFunc(size_t idx) } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + /// do bare minimum in catch block + LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); + exception_str = getCurrentExceptionMessage(true /* with_stacktrace */); + } + + if (!exception_str.empty()) + { + LOG_ERROR(log, "{} {}", __PRETTY_FUNCTION__, exception_str); + + auto safe_consumers = getSafeConsumers(); + for (auto const & consumer_ptr_weak : safe_consumers.consumers) + { + /// propagate materialized view exception to all consumers + if (auto consumer_ptr = consumer_ptr_weak.lock()) + { + consumer_ptr->setExceptionInfo(exception_str, false /* no stacktrace, reuse passed one */); + } + } } mv_attached.store(false); @@ -846,6 +865,7 @@ bool StorageKafka::streamToViews() // we need to read all consumers in parallel (sequential read may lead to situation // when some of consumers are not used, and will break some Kafka consumer invariants) block_io.pipeline.setNumThreads(stream_count); + block_io.pipeline.setConcurrencyControl(kafka_context->getSettingsRef().use_concurrency_control); block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); CompletedPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 5719529533e..aec2405b973 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -263,7 +263,7 @@ NamesAndTypesList StorageLiveView::getVirtuals() const }; } -void StorageLiveView::checkTableCanBeDropped() const +void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); @@ -478,7 +478,7 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) }); auto executor = pipeline.execute(); - executor->execute(pipeline.getNumThreads()); + executor->execute(pipeline.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageLiveView::refresh() diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 002cbf96ebe..92ffd4dc642 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -75,7 +75,7 @@ public: NamesAndTypesList getVirtuals() const override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void drop() override; diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index 23b6668c8d8..a9cdd09e061 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -18,7 +18,9 @@ namespace CurrentMetrics namespace DB { -struct AsyncBlockIDsCache::Cache : public std::unordered_set + +template +struct AsyncBlockIDsCache::Cache : public std::unordered_set { CurrentMetrics::Increment cache_size_increment; explicit Cache(std::unordered_set && set_) @@ -27,7 +29,8 @@ struct AsyncBlockIDsCache::Cache : public std::unordered_set {} }; -std::vector AsyncBlockIDsCache::getChildren() +template +std::vector AsyncBlockIDsCache::getChildren() { auto zookeeper = storage.getZooKeeper(); @@ -50,7 +53,8 @@ std::vector AsyncBlockIDsCache::getChildren() return children; } -void AsyncBlockIDsCache::update() +template +void AsyncBlockIDsCache::update() try { std::vector paths = getChildren(); @@ -73,24 +77,27 @@ catch (...) task->scheduleAfter(update_min_interval.count()); } -AsyncBlockIDsCache::AsyncBlockIDsCache(StorageReplicatedMergeTree & storage_) +template +AsyncBlockIDsCache::AsyncBlockIDsCache(TStorage & storage_) : storage(storage_), update_min_interval(storage.getSettings()->async_block_ids_cache_min_update_interval_ms), - path(storage.zookeeper_path + "/async_blocks"), + path(storage.getZooKeeperPath() + "/async_blocks"), log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)"), log(&Poco::Logger::get(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ update(); }); } -void AsyncBlockIDsCache::start() +template +void AsyncBlockIDsCache::start() { if (storage.getSettings()->use_async_block_ids_cache) task->activateAndSchedule(); } /// Caller will keep the version of last call. When the caller calls again, it will wait util gets a newer version. -Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last_version) +template +Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last_version) { if (!storage.getSettings()->use_async_block_ids_cache) return {}; @@ -128,4 +135,6 @@ Strings AsyncBlockIDsCache::detectConflicts(const Strings & paths, UInt64 & last return conflicts; } +template class AsyncBlockIDsCache; + } diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h index 91d549a0501..fbd97fd00ff 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.h +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h @@ -8,8 +8,7 @@ namespace DB { -class StorageReplicatedMergeTree; - +template class AsyncBlockIDsCache { struct Cache; @@ -20,7 +19,7 @@ class AsyncBlockIDsCache void update(); public: - explicit AsyncBlockIDsCache(StorageReplicatedMergeTree & storage_); + explicit AsyncBlockIDsCache(TStorage & storage_); void start(); @@ -30,7 +29,7 @@ public: private: - StorageReplicatedMergeTree & storage; + TStorage & storage; std::atomic last_updatetime; const std::chrono::milliseconds update_min_interval; @@ -48,6 +47,4 @@ private: Poco::Logger * log; }; -using AsyncBlockIDsCachePtr = std::shared_ptr; - } diff --git a/src/Storages/MergeTree/InsertBlockInfo.cpp b/src/Storages/MergeTree/InsertBlockInfo.cpp new file mode 100644 index 00000000000..ac900f8cf09 --- /dev/null +++ b/src/Storages/MergeTree/InsertBlockInfo.cpp @@ -0,0 +1,150 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +AsyncInsertBlockInfo::AsyncInsertBlockInfo( + Poco::Logger * log_, + std::vector && block_id_, + BlockWithPartition && block_, + std::optional && unmerged_block_with_partition_) + : log(log_) + , block_id(std::move(block_id_)) + , block_with_partition(std::move(block_)) + , unmerged_block_with_partition(std::move(unmerged_block_with_partition_)) +{ + initBlockIDMap(); +} + +void AsyncInsertBlockInfo::initBlockIDMap() +{ + block_id_to_offset_idx.clear(); + for (size_t i = 0; i < block_id.size(); ++i) + { + block_id_to_offset_idx[block_id[i]].push_back(i); + } +} + +/// this function check if the block contains duplicate inserts. +/// if so, we keep only one insert for every duplicate ones. +bool AsyncInsertBlockInfo::filterSelfDuplicate() +{ + std::vector dup_block_ids; + for (const auto & [hash_id, offset_indexes] : block_id_to_offset_idx) + { + /// It means more than one inserts have the same hash id, in this case, we should keep only one of them. + if (offset_indexes.size() > 1) + dup_block_ids.push_back(hash_id); + } + if (dup_block_ids.empty()) + return false; + + filterBlockDuplicate(dup_block_ids, true); + return true; +} + +/// remove the conflict parts of block for rewriting again. +void AsyncInsertBlockInfo::filterBlockDuplicate(const std::vector & block_paths, bool self_dedup) +{ + auto * current_block_with_partition = unmerged_block_with_partition.has_value() ? &unmerged_block_with_partition.value() : &block_with_partition; + std::vector offset_idx; + for (const auto & raw_path : block_paths) + { + std::filesystem::path p(raw_path); + String conflict_block_id = p.filename(); + auto it = block_id_to_offset_idx.find(conflict_block_id); + if (it == block_id_to_offset_idx.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown conflict path {}", conflict_block_id); + /// if this filter is for self_dedup, that means the block paths is selected by `filterSelfDuplicate`, which is a self purge. + /// in this case, we don't know if zk has this insert, then we should keep one insert, to avoid missing this insert. + offset_idx.insert(std::end(offset_idx), std::begin(it->second) + self_dedup, std::end(it->second)); + } + std::sort(offset_idx.begin(), offset_idx.end()); + + auto & offsets = current_block_with_partition->offsets; + size_t idx = 0, remove_count = 0; + auto it = offset_idx.begin(); + std::vector new_offsets; + std::vector new_block_ids; + + /// construct filter + size_t rows = current_block_with_partition->block.rows(); + auto filter_col = ColumnUInt8::create(rows, 1u); + ColumnUInt8::Container & vec = filter_col->getData(); + UInt8 * pos = vec.data(); + for (auto & offset : offsets) + { + if (it != offset_idx.end() && *it == idx) + { + size_t start_pos = idx > 0 ? offsets[idx - 1] : 0; + size_t end_pos = offset; + remove_count += end_pos - start_pos; + while (start_pos < end_pos) + { + *(pos + start_pos) = 0; + start_pos++; + } + it++; + } + else + { + new_offsets.push_back(offset - remove_count); + new_block_ids.push_back(block_id[idx]); + } + idx++; + } + + LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size()); + + current_block_with_partition->offsets = std::move(new_offsets); + block_id = std::move(new_block_ids); + auto cols = current_block_with_partition->block.getColumns(); + for (auto & col : cols) + { + col = col->filter(vec, rows - remove_count); + } + current_block_with_partition->block.setColumns(cols); + + LOG_TRACE(log, "New block rows {}", current_block_with_partition->block.rows()); + + initBlockIDMap(); + + if (unmerged_block_with_partition.has_value()) + block_with_partition.block = unmerged_block_with_partition->block; +} + +std::vector AsyncInsertBlockInfo::getHashesForBlocks(BlockWithPartition & block, String partition_id) +{ + size_t start = 0; + auto cols = block.block.getColumns(); + std::vector block_id_vec; + for (size_t i = 0; i < block.offsets.size(); ++i) + { + size_t offset = block.offsets[i]; + std::string_view token = block.tokens[i]; + if (token.empty()) + { + SipHash hash; + for (size_t j = start; j < offset; ++j) + { + for (const auto & col : cols) + col->updateHashWithValue(j, hash); + } + + const auto hash_value = hash.get128(); + block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.items[0]) + "_" + DB::toString(hash_value.items[1])); + } + else + block_id_vec.push_back(partition_id + "_" + std::string(token)); + + start = offset; + } + return block_id_vec; +} + +} diff --git a/src/Storages/MergeTree/InsertBlockInfo.h b/src/Storages/MergeTree/InsertBlockInfo.h new file mode 100644 index 00000000000..3882373c0fa --- /dev/null +++ b/src/Storages/MergeTree/InsertBlockInfo.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +namespace DB +{ + +struct SyncInsertBlockInfo +{ + SyncInsertBlockInfo( + Poco::Logger * /*log_*/, + std::string && block_id_, + BlockWithPartition && /*block_*/, + std::optional && /*unmerged_block_with_partition_*/) + : block_id(std::move(block_id_)) + { + } + + explicit SyncInsertBlockInfo(std::string block_id_) + : block_id(std::move(block_id_)) + {} + + std::string block_id; +}; + +struct AsyncInsertBlockInfo +{ + Poco::Logger * log; + std::vector block_id; + BlockWithPartition block_with_partition; + /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets + /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block + std::optional unmerged_block_with_partition; + std::unordered_map> block_id_to_offset_idx; + + AsyncInsertBlockInfo( + Poco::Logger * log_, + std::vector && block_id_, + BlockWithPartition && block_, + std::optional && unmerged_block_with_partition_); + + void initBlockIDMap(); + + /// this function check if the block contains duplicate inserts. + /// if so, we keep only one insert for every duplicate ones. + bool filterSelfDuplicate(); + + /// remove the conflict parts of block for rewriting again. + void filterBlockDuplicate(const std::vector & block_paths, bool self_dedup); + /// Convert block id vector to string. Output at most 50 ids. + + static std::vector getHashesForBlocks(BlockWithPartition & block, String partition_id); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index cfc3b7519b8..d2765f10a22 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -25,6 +25,7 @@ public: template using AnnoyIndexWithSerializationPtr = std::shared_ptr>; + template struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule { @@ -43,6 +44,7 @@ struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule AnnoyIndexWithSerializationPtr index; }; + template struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator { @@ -104,7 +106,6 @@ private: const String distance_function; }; - } #endif diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index 731aa2309e3..70e2b8f76df 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -35,7 +35,7 @@ USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dime } template -void USearchIndexWithSerialization::serialize([[maybe_unused]] WriteBuffer & ostr) const +void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const { auto callback = [&ostr](void * from, size_t n) { @@ -43,21 +43,19 @@ void USearchIndexWithSerialization::serialize([[maybe_unused]] WriteBuff return true; }; - Base::stream(callback); + Base::save_to_stream(callback); } template -void USearchIndexWithSerialization::deserialize([[maybe_unused]] ReadBuffer & istr) +void USearchIndexWithSerialization::deserialize(ReadBuffer & istr) { - BufferBase::Position & pos = istr.position(); - unum::usearch::memory_mapped_file_t memory_map(pos, istr.buffer().size() - istr.count()); - Base::view(std::move(memory_map)); - pos += Base::stream_length(); + auto callback = [&istr](void * from, size_t n) + { + istr.readStrict(reinterpret_cast(from), n); + return true; + }; - auto copy = Base::copy(); - if (!copy) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not copy usearch index"); - Base::swap(copy.index); + Base::load_from_stream(callback); } template @@ -246,18 +244,17 @@ std::vector MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTre throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance"); const std::vector reference_vector = ann_condition.getReferenceVector(); + const auto granule = std::dynamic_pointer_cast>(idx_granule); if (granule == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); const USearchIndexWithSerializationPtr index = granule->index; + if (ann_condition.getDimensions() != index->dimensions()) - throw Exception( - ErrorCodes::INCORRECT_QUERY, - "The dimension of the space in the request ({}) " + throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " "does not match the dimension in the index ({})", - ann_condition.getDimensions(), - index->dimensions()); + ann_condition.getDimensions(), index->dimensions()); auto result = index->search(reference_vector.data(), limit); std::vector neighbors(result.size()); /// indexes of dots which were closest to the reference vector diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h index f1fde934fd5..98fb05b6f1a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -27,6 +27,7 @@ public: template using USearchIndexWithSerializationPtr = std::shared_ptr>; + template struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule { @@ -45,6 +46,7 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule USearchIndexWithSerializationPtr index; }; + template struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator { @@ -64,7 +66,11 @@ struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexConditionApproximateNearestNeighbor { public: - MergeTreeIndexConditionUSearch(const IndexDescription & index_description, const SelectQueryInfo & query, const String & distance_function, ContextPtr context); + MergeTreeIndexConditionUSearch( + const IndexDescription & index_description, + const SelectQueryInfo & query, + const String & distance_function, + ContextPtr context); ~MergeTreeIndexConditionUSearch() override = default; @@ -98,7 +104,6 @@ private: const String distance_function; }; - } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 72c495191c2..8adcdee7e23 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -49,17 +50,11 @@ namespace ErrorCodes template struct ReplicatedMergeTreeSinkImpl::DelayedChunk { - struct Partition + using BlockInfo = std::conditional_t; + struct Partition : public BlockInfo { - Poco::Logger * log; MergeTreeDataWriter::TemporaryPart temp_part; UInt64 elapsed_ns; - BlockIDsType block_id; - BlockWithPartition block_with_partition; - /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets - /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block - std::optional unmerged_block_with_partition; - std::unordered_map> block_id_to_offset_idx; ProfileEvents::Counters part_counters; Partition() = default; @@ -70,127 +65,11 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk BlockWithPartition && block_, std::optional && unmerged_block_with_partition_, ProfileEvents::Counters && part_counters_) - : log(log_), + : BlockInfo(log_, std::move(block_id_), std::move(block_), std::move(unmerged_block_with_partition_)), temp_part(std::move(temp_part_)), elapsed_ns(elapsed_ns_), - block_id(std::move(block_id_)), - block_with_partition(std::move(block_)), - unmerged_block_with_partition(std::move(unmerged_block_with_partition_)), part_counters(std::move(part_counters_)) - { - initBlockIDMap(); - } - - void initBlockIDMap() - { - if constexpr (async_insert) - { - block_id_to_offset_idx.clear(); - for (size_t i = 0; i < block_id.size(); ++i) - { - block_id_to_offset_idx[block_id[i]].push_back(i); - } - } - } - - /// this function check if the block contains duplicate inserts. - /// if so, we keep only one insert for every duplicate ones. - bool filterSelfDuplicate() - { - if constexpr (async_insert) - { - std::vector dup_block_ids; - for (const auto & [hash_id, offset_indexes] : block_id_to_offset_idx) - { - /// It means more than one inserts have the same hash id, in this case, we should keep only one of them. - if (offset_indexes.size() > 1) - dup_block_ids.push_back(hash_id); - } - if (dup_block_ids.empty()) - return false; - - filterBlockDuplicate(dup_block_ids, true); - return true; - } - return false; - } - - /// remove the conflict parts of block for rewriting again. - void filterBlockDuplicate(const std::vector & block_paths, bool self_dedup) - { - if constexpr (async_insert) - { - auto * current_block_with_partition = unmerged_block_with_partition.has_value() ? &unmerged_block_with_partition.value() : &block_with_partition; - std::vector offset_idx; - for (const auto & raw_path : block_paths) - { - std::filesystem::path p(raw_path); - String conflict_block_id = p.filename(); - auto it = block_id_to_offset_idx.find(conflict_block_id); - if (it == block_id_to_offset_idx.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown conflict path {}", conflict_block_id); - /// if this filter is for self_dedup, that means the block paths is selected by `filterSelfDuplicate`, which is a self purge. - /// in this case, we don't know if zk has this insert, then we should keep one insert, to avoid missing this insert. - offset_idx.insert(std::end(offset_idx), std::begin(it->second) + self_dedup, std::end(it->second)); - } - std::sort(offset_idx.begin(), offset_idx.end()); - - auto & offsets = current_block_with_partition->offsets; - size_t idx = 0, remove_count = 0; - auto it = offset_idx.begin(); - std::vector new_offsets; - std::vector new_block_ids; - - /// construct filter - size_t rows = current_block_with_partition->block.rows(); - auto filter_col = ColumnUInt8::create(rows, 1u); - ColumnUInt8::Container & vec = filter_col->getData(); - UInt8 * pos = vec.data(); - for (auto & offset : offsets) - { - if (it != offset_idx.end() && *it == idx) - { - size_t start_pos = idx > 0 ? offsets[idx - 1] : 0; - size_t end_pos = offset; - remove_count += end_pos - start_pos; - while (start_pos < end_pos) - { - *(pos + start_pos) = 0; - start_pos++; - } - it++; - } - else - { - new_offsets.push_back(offset - remove_count); - new_block_ids.push_back(block_id[idx]); - } - idx++; - } - - LOG_TRACE(log, "New block IDs: {}, new offsets: {}, size: {}", toString(new_block_ids), toString(new_offsets), new_offsets.size()); - - current_block_with_partition->offsets = std::move(new_offsets); - block_id = std::move(new_block_ids); - auto cols = current_block_with_partition->block.getColumns(); - for (auto & col : cols) - { - col = col->filter(vec, rows - remove_count); - } - current_block_with_partition->block.setColumns(cols); - - LOG_TRACE(log, "New block rows {}", current_block_with_partition->block.rows()); - - initBlockIDMap(); - - if (unmerged_block_with_partition.has_value()) - block_with_partition.block = unmerged_block_with_partition->block; - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "sync insert should not call rewriteBlock"); - } - } + {} }; DelayedChunk() = default; @@ -236,35 +115,6 @@ namespace if (size > 50) size = 50; return fmt::format("({})", fmt::join(vec.begin(), vec.begin() + size, ",")); } - - std::vector getHashesForBlocks(BlockWithPartition & block, String partition_id) - { - size_t start = 0; - auto cols = block.block.getColumns(); - std::vector block_id_vec; - for (size_t i = 0; i < block.offsets.size(); ++i) - { - size_t offset = block.offsets[i]; - std::string_view token = block.tokens[i]; - if (token.empty()) - { - SipHash hash; - for (size_t j = start; j < offset; ++j) - { - for (const auto & col : cols) - col->updateHashWithValue(j, hash); - } - - const auto hash_value = hash.get128(); - block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.items[0]) + "_" + DB::toString(hash_value.items[1])); - } - else - block_id_vec.push_back(partition_id + "_" + std::string(token)); - - start = offset; - } - return block_id_vec; - } } template @@ -470,7 +320,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); + block_id = AsyncInsertBlockInfo::getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size()); } else diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index efe54243ee9..cc7b0d88be5 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -38,7 +38,7 @@ public: /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. - void checkTableCanBeDropped() const override { drop_table = true; } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { drop_table = true; } /// Always return virtual columns in addition to required columns void read( diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index dc410c4f298..2b40c88ba6e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -41,7 +41,7 @@ public: /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. - void checkTableCanBeDropped() const override { drop_table = true; } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { drop_table = true; } /// Always return virtual columns in addition to required columns void read( diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 95ddfb0f605..6d52d45c6a9 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -10,7 +10,6 @@ #include #include #include -#include #include @@ -211,8 +210,6 @@ struct SelectQueryInfo /// should we use custom key with the cluster bool use_custom_key = false; - mutable ParallelReplicasReadingCoordinatorPtr coordinator; - TreeRewriterResultPtr syntax_analyzer_result; /// This is an additional filer applied to current table. diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index b0b8aba38c7..09f972e4098 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -145,7 +145,7 @@ StorageDictionary::~StorageDictionary() removeDictionaryConfigurationFromRepository(); } -void StorageDictionary::checkTableCanBeDropped() const +void StorageDictionary::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { if (location == Location::SameDatabaseAndNameAsDictionary) throw Exception(ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE, @@ -159,7 +159,9 @@ void StorageDictionary::checkTableCanBeDropped() const void StorageDictionary::checkTableCanBeDetached() const { - checkTableCanBeDropped(); + /// Actually query context (from DETACH query) should be passed here. + /// But we don't use it for this type of storage + checkTableCanBeDropped(getContext()); } Pipe StorageDictionary::read( diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 48230dcfa9f..7d3ed01d185 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -62,7 +62,7 @@ public: ~StorageDictionary() override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void checkTableCanBeDetached() const override; Pipe read( diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6f0072c4560..a4d3c566967 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -331,6 +331,9 @@ StorageDistributed::StorageDistributed( , distributed_settings(distributed_settings_) , rng(randomSeed()) { + if (!distributed_settings.flush_on_detach && distributed_settings.monitor_batch_inserts) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Settings flush_on_detach=0 and monitor_batch_inserts=1 are incompatible"); + StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { @@ -1438,12 +1441,6 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) void StorageDistributed::flushAndPrepareForShutdown() { - if (!getDistributedSettingsRef().flush_on_detach) - { - LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); - return; - } - try { flushClusterNodesAllData(getContext()); @@ -1469,9 +1466,18 @@ void StorageDistributed::flushClusterNodesAllData(ContextPtr local_context) directory_monitors.push_back(node.second.directory_monitor); } + bool need_flush = getDistributedSettingsRef().flush_on_detach; + if (!need_flush) + LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); + /// TODO: Maybe it should be executed in parallel for (auto & node : directory_monitors) - node->flushAllData(); + { + if (need_flush) + node->flushAllData(); + else + node->shutdownWithoutFlush(); + } } void StorageDistributed::rename(const String & new_path_to_table_data, const StorageID & new_table_id) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index facf41ff296..5f54066374f 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -384,33 +384,9 @@ std::unique_ptr createReadBuffer( bool use_table_fd, int table_fd, const String & compression_method, - ContextPtr context, - const String & path_to_archive = "") + ContextPtr context) { CompressionMethod method; - - if (!path_to_archive.empty()) - { - auto reader = createArchiveReader(path_to_archive); - - if (current_path.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(current_path)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", current_path, matcher->error()); - - return reader->readFile([my_matcher = std::move(matcher)](const std::string & path) - { - return re2::RE2::FullMatch(path, *my_matcher); - }, /*throw_on_not_found=*/true); - } - else - { - return reader->readFile(current_path, /*throw_on_not_found=*/true); - } - } - if (use_table_fd) method = chooseCompressionMethod("", compression_method); else @@ -471,14 +447,12 @@ namespace public: ReadBufferFromFileIterator( const std::vector & paths_, - const std::vector & paths_to_archive_, const String & format_, const String & compression_method_, const std::optional & format_settings_, ContextPtr context_) : WithContext(context_) , paths(paths_) - , paths_to_archive(paths_to_archive_) , format(format_) , compression_method(compression_method_) , format_settings(format_settings_) @@ -487,15 +461,13 @@ namespace std::unique_ptr next() override { - String path; struct stat file_stat; bool is_first = current_index == 0; - const auto & paths_ref = paths_to_archive.empty() ? paths : paths_to_archive; do { - if (current_index == paths_ref.size()) + if (current_index == paths.size()) { if (is_first) throw Exception( @@ -505,19 +477,16 @@ namespace return nullptr; } - path = paths_ref[current_index++]; + path = paths[current_index++]; file_stat = getFileStat(path, false, -1, "File"); } while (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); - if (paths_to_archive.empty()) - return createReadBuffer(path, file_stat, false, -1, compression_method, getContext()); - - return createReadBuffer(paths[0], file_stat, false, -1, compression_method, getContext(), path); + return createReadBuffer(path, file_stat, false, -1, compression_method, getContext()); } void setNumRowsToLastFile(size_t num_rows) override { - if (!getContext()->getSettingsRef().use_cache_for_count_from_files || !paths_to_archive.empty()) + if (!getContext()->getSettingsRef().use_cache_for_count_from_files) return; auto key = getKeyForSchemaCache(paths[current_index - 1], format, format_settings, getContext()); @@ -526,12 +495,182 @@ namespace private: const std::vector & paths; - const std::vector & paths_to_archive; + size_t current_index = 0; String format; String compression_method; const std::optional & format_settings; }; + + struct ReadBufferFromArchiveIterator : public IReadBufferIterator, WithContext + { + public: + ReadBufferFromArchiveIterator( + const StorageFile::ArchiveInfo & archive_info_, + const String & format_, + const std::optional & format_settings_, + ContextPtr context_) + : WithContext(context_) + , archive_info(archive_info_) + , format(format_) + , format_settings(format_settings_) + { + } + + std::unique_ptr next() override + { + std::unique_ptr read_buf; + struct stat file_stat; + while (true) + { + if (current_archive_index == archive_info.paths_to_archives.size()) + { + if (is_first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + + return nullptr; + } + + const auto & archive = archive_info.paths_to_archives[current_archive_index]; + file_stat = getFileStat(archive, false, -1, "File"); + if (file_stat.st_size == 0) + { + if (getContext()->getSettingsRef().engine_file_skip_empty_files) + { + ++current_archive_index; + continue; + } + + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because the archive {} is empty. " + "You must specify table structure manually", + format, + archive); + } + + auto archive_reader = createArchiveReader(archive); + + auto try_get_columns_from_schema_cache = [&](const std::string & full_path) -> std::optional + { + auto context = getContext(); + if (!getContext()->getSettingsRef().schema_inference_use_cache_for_file) + return std::nullopt; + + auto & schema_cache = StorageFile::getSchemaCache(context); + auto get_last_mod_time = [&]() -> std::optional + { + if (0 != stat(archive_reader->getPath().c_str(), &file_stat)) + return std::nullopt; + + return file_stat.st_mtime; + }; + + auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context); + auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + + if (columns) + return columns; + + return std::nullopt; + }; + + if (archive_info.isSingleFileRead()) + { + read_buf = archive_reader->readFile(archive_info.path_in_archive, false); + ++current_archive_index; + if (!read_buf) + continue; + + last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), archive_info.path_in_archive)); + columns_from_cache = try_get_columns_from_schema_cache(last_read_file_path); + + if (columns_from_cache) + return nullptr; + } + else + { + auto file_enumerator = archive_reader->firstFile(); + if (!file_enumerator) + { + if (getContext()->getSettingsRef().engine_file_skip_empty_files) + { + read_files_from_archive.clear(); + ++current_archive_index; + continue; + } + + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because the archive {} has no files. " + "You must specify table structure manually", + format, + archive); + } + + const auto * filename = &file_enumerator->getFileName(); + while (read_files_from_archive.contains(*filename) || !archive_info.filter(*filename)) + { + if (!file_enumerator->nextFile()) + { + archive_reader = nullptr; + break; + } + + filename = &file_enumerator->getFileName(); + } + + if (!archive_reader) + { + read_files_from_archive.clear(); + ++current_archive_index; + continue; + } + + last_read_file_path = processed_files.emplace_back(fmt::format("{}::{}", archive_reader->getPath(), *filename)); + columns_from_cache = try_get_columns_from_schema_cache(last_read_file_path); + + if (columns_from_cache) + return nullptr; + + read_files_from_archive.insert(*filename); + read_buf = archive_reader->readFile(std::move(file_enumerator)); + } + + break; + } + + is_first = false; + return read_buf; + } + + void setNumRowsToLastFile(size_t num_rows) override + { + if (!getContext()->getSettingsRef().use_cache_for_count_from_files) + return; + + auto key = getKeyForSchemaCache(last_read_file_path, format, format_settings, getContext()); + StorageFile::getSchemaCache(getContext()).addNumRows(key, num_rows); + } + + std::vector processed_files; + std::optional columns_from_cache; + private: + const StorageFile::ArchiveInfo & archive_info; + + size_t current_archive_index = 0; + std::unordered_set read_files_from_archive; + + bool is_first = true; + + std::string last_read_file_path; + + String format; + const std::optional & format_settings; + }; } ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr context) @@ -567,7 +706,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( const String & compression_method, const std::optional & format_settings, ContextPtr context, - const std::vector & paths_to_archive) + const std::optional & archive_info) { if (format == "Distributed") { @@ -577,29 +716,102 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return ColumnsDescription(DistributedAsyncInsertSource(paths[0]).getOutputs().front().getHeader().getNamesAndTypesList()); } - if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) + if (((archive_info && archive_info->paths_to_archives.empty()) || (!archive_info && paths.empty())) + && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file, because there are no files with provided path. " "You must specify table structure manually", format); - std::optional columns_from_cache; - if (context->getSettingsRef().schema_inference_use_cache_for_file) - columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); - ColumnsDescription columns; - if (columns_from_cache) + if (archive_info) { - columns = *columns_from_cache; + std::vector paths_for_schema_cache; + std::optional columns_from_cache; + + if (context->getSettingsRef().schema_inference_use_cache_for_file) + { + paths_for_schema_cache.reserve(archive_info->paths_to_archives.size()); + struct stat file_stat{}; + for (const auto & archive : archive_info->paths_to_archives) + { + const auto & full_path = paths_for_schema_cache.emplace_back(fmt::format("{}::{}", archive, archive_info->path_in_archive)); + + if (!columns_from_cache) + { + auto & schema_cache = getSchemaCache(context); + auto get_last_mod_time = [&]() -> std::optional + { + if (0 != stat(archive.c_str(), &file_stat)) + return std::nullopt; + + return file_stat.st_mtime; + }; + + auto cache_key = getKeyForSchemaCache(full_path, format, format_settings, context); + columns_from_cache = schema_cache.tryGetColumns(cache_key, get_last_mod_time); + } + } + } + + if (columns_from_cache) + { + columns = std::move(*columns_from_cache); + } + else + { + ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context); + try + { + columns = readSchemaFromFormat( + format, + format_settings, + read_buffer_iterator, + /*retry=*/archive_info->paths_to_archives.size() > 1 || !archive_info->isSingleFileRead(), + context); + } + catch (const DB::Exception & e) + { + /// maybe we found something in cache while iterating files + if (e.code() == ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE) + { + if (read_buffer_iterator.columns_from_cache) + columns = std::move(*read_buffer_iterator.columns_from_cache); + else + throw; + } + else + { + throw; + } + } + + for (auto & file : read_buffer_iterator.processed_files) + paths_for_schema_cache.push_back(std::move(file)); + } + + if (context->getSettingsRef().schema_inference_use_cache_for_file) + addColumnsToCache(paths_for_schema_cache, columns, format, format_settings, context); } else { - ReadBufferFromFileIterator read_buffer_iterator(paths, paths_to_archive, format, compression_method, format_settings, context); - columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); - } + std::optional columns_from_cache; + if (context->getSettingsRef().schema_inference_use_cache_for_file) + columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); - if (context->getSettingsRef().schema_inference_use_cache_for_file) - addColumnsToCache(paths, columns, format, format_settings, context); + if (columns_from_cache) + { + columns = *columns_from_cache; + } + else + { + ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context); + columns = readSchemaFromFormat(format, format_settings, read_buffer_iterator, paths.size() > 1, context); + } + + if (context->getSettingsRef().schema_inference_use_cache_for_file) + addColumnsToCache(archive_info ? archive_info->paths_to_archives : paths, columns, format, format_settings, context); + } return columns; } @@ -643,14 +855,9 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us : StorageFile(args) { if (!args.path_to_archive.empty()) - { - paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read); - paths = {table_path_}; - } + archive_info = getArchiveInfo(args.path_to_archive, table_path_, user_files_path, args.getContext(), total_bytes_to_read); else - { paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); - } is_db_table = false; is_path_with_globs = paths.size() > 1; @@ -706,7 +913,13 @@ void StorageFile::setStorageMetadata(CommonArguments args) columns = getTableStructureFromFileDescriptor(args.getContext()); else { - columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive); + columns = getTableStructureFromFile( + format_name, + paths, + compression_method, + format_settings, + args.getContext(), + archive_info); if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -743,15 +956,14 @@ public: public: explicit FilesIterator( const Strings & files_, - std::vector archives_, - const IArchiveReader::NameFilter & name_filter_, + std::optional archive_info_, ASTPtr query, const NamesAndTypesList & virtual_columns, ContextPtr context_) - : files(files_), archives(std::move(archives_)), name_filter(name_filter_) + : files(files_), archive_info(std::move(archive_info_)) { ASTPtr filter_ast; - if (archives.empty() && !files.empty() && !files[0].empty()) + if (!archive_info && !files.empty() && !files[0].empty()) filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, files[0], context_); if (filter_ast) @@ -760,7 +972,7 @@ public: String next() { - const auto & fs = fromArchive() ? archives : files; + const auto & fs = isReadFromArchive() ? archive_info->paths_to_archives : files; auto current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= fs.size()) @@ -769,35 +981,32 @@ public: return fs[current_index]; } - bool fromArchive() const + bool isReadFromArchive() const { - return !archives.empty(); + return archive_info.has_value(); } - bool readSingleFileFromArchive() const + bool validFileInArchive(const std::string & path) const { - return !name_filter; + return archive_info->filter(path); } - bool passesFilter(const std::string & name) const + bool isSingleFileReadFromArchive() const { - std::lock_guard lock(filter_mutex); - return name_filter(name); + return archive_info->isSingleFileRead(); } - const String & getFileName() + const String & getFileNameInArchive() { - if (files.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but got {}", files.size()); + if (archive_info->path_in_archive.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only 1 filename but it's empty"); - return files[0]; + return archive_info->path_in_archive; } private: std::vector files; - std::vector archives; - mutable std::mutex filter_mutex; - IArchiveReader::NameFilter name_filter; + std::optional archive_info; std::atomic index = 0; }; @@ -901,6 +1110,32 @@ public: return storage->getName(); } + bool tryGetCountFromCache(const struct stat & file_stat) + { + if (!context->getSettingsRef().use_cache_for_count_from_files) + return false; + + auto num_rows_from_cache = tryGetNumRowsFromCache(current_path, file_stat.st_mtime); + if (!num_rows_from_cache) + return false; + + /// We should not return single chunk with all number of rows, + /// because there is a chance that this chunk will be materialized later + /// (it can cause memory problems even with default values in columns or when virtual columns are requested). + /// Instead, we use special ConstChunkGenerator that will generate chunks + /// with max_block_size rows until total number of rows is reached. + auto const_chunk_generator = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); + QueryPipelineBuilder builder; + builder.init(Pipe(const_chunk_generator)); + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); + return true; + } + Chunk generate() override { while (!finished_generate) @@ -910,21 +1145,27 @@ public: { if (!storage->use_table_fd) { - if (files_iterator->fromArchive()) + if (files_iterator->isReadFromArchive()) { - if (files_iterator->readSingleFileFromArchive()) + struct stat file_stat; + if (files_iterator->isSingleFileReadFromArchive()) { auto archive = files_iterator->next(); if (archive.empty()) return {}; - struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; archive_reader = createArchiveReader(archive); - current_path = files_iterator->getFileName(); - read_buf = archive_reader->readFile(current_path, /*throw_on_not_found=*/false); + filename_override = files_iterator->getFileNameInArchive(); + + current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); + if (need_only_count && tryGetCountFromCache(file_stat)) + continue; + + read_buf = archive_reader->readFile(*filename_override, /*throw_on_not_found=*/false); if (!read_buf) continue; } @@ -938,7 +1179,7 @@ public: if (archive.empty()) return {}; - struct stat file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); + file_stat = getFileStat(archive, storage->use_table_fd, storage->table_fd, storage->getName()); if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; @@ -948,7 +1189,7 @@ public: } bool file_found = true; - while (!files_iterator->passesFilter(file_enumerator->getFileName())) + while (!files_iterator->validFileInArchive(file_enumerator->getFileName())) { if (!file_enumerator->nextFile()) { @@ -959,7 +1200,7 @@ public: if (file_found) { - current_path = file_enumerator->getFileName(); + filename_override = file_enumerator->getFileName(); break; } @@ -967,6 +1208,10 @@ public: } chassert(file_enumerator); + current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); + if (need_only_count && tryGetCountFromCache(file_stat)) + continue; + read_buf = archive_reader->readFile(std::move(file_enumerator)); } } @@ -994,35 +1239,23 @@ public: if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; - if (need_only_count && context->getSettingsRef().use_cache_for_count_from_files) - { - auto num_rows_from_cache = tryGetNumRowsFromCache(current_path, file_stat.st_mtime); - if (num_rows_from_cache) - { - /// We should not return single chunk with all number of rows, - /// because there is a chance that this chunk will be materialized later - /// (it can cause memory problems even with default values in columns or when virtual columns are requested). - /// Instead, we use special ConstChunkGenerator that will generate chunks - /// with max_block_size rows until total number of rows is reached. - auto const_chunk_generator = std::make_shared(block_for_format, *num_rows_from_cache, max_block_size); - QueryPipelineBuilder builder; - builder.init(Pipe(const_chunk_generator)); - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, requested_columns); - }); - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - continue; - } - } + if (need_only_count && tryGetCountFromCache(file_stat)) + continue; read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); } const Settings & settings = context->getSettingsRef(); - chassert(!storage->paths.empty()); - const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); + + size_t file_num = 0; + if (storage->archive_info) + file_num = storage->archive_info->paths_to_archives.size(); + else + file_num = storage->paths.size(); + + chassert(file_num > 0); + + const auto max_parsing_threads = std::max(settings.max_threads / file_num, 1UL); input_format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings, need_only_count ? 1 : max_parsing_threads); input_format->setQueryInfo(query_info, context); if (need_only_count) @@ -1063,7 +1296,8 @@ public: progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk(chunk, requested_virtual_columns, current_path); + VirtualColumnUtils::addRequestedPathAndFileVirtualsToChunk( + chunk, requested_virtual_columns, current_path, filename_override.has_value() ? &filename_override.value() : nullptr); return chunk; } @@ -1081,8 +1315,18 @@ public: pipeline.reset(); input_format.reset(); - if (files_iterator->fromArchive() && !files_iterator->readSingleFileFromArchive()) - file_enumerator = archive_reader->nextFile(std::move(read_buf)); + if (files_iterator->isReadFromArchive() && !files_iterator->isSingleFileReadFromArchive()) + { + if (file_enumerator) + { + if (!file_enumerator->nextFile()) + file_enumerator = nullptr; + } + else + { + file_enumerator = archive_reader->nextFile(std::move(read_buf)); + } + } read_buf.reset(); } @@ -1114,6 +1358,7 @@ private: StorageSnapshotPtr storage_snapshot; FilesIteratorPtr files_iterator; String current_path; + std::optional filename_override; Block sample_block; std::unique_ptr read_buf; InputFormatPtr input_format; @@ -1155,44 +1400,35 @@ Pipe StorageFile::read( } else { - const auto & p = paths_to_archive.empty() ? paths : paths_to_archive; - if (p.size() == 1 && !fs::exists(p[0])) + const std::vector * p; + + if (archive_info.has_value()) + p = &archive_info->paths_to_archives; + else + p = &paths; + + if (p->size() == 1 && !fs::exists(p->at(0))) { if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p->at(0)); } } - IArchiveReader::NameFilter filter; - if (!paths_to_archive.empty()) - { - if (paths.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive"); + auto files_iterator + = std::make_shared(paths, archive_info, query_info.query, virtual_columns, context); - const auto & path = paths[0]; - - if (path.find_first_of("*?{") != std::string::npos) - { - auto matcher = std::make_shared(makeRegexpPatternFromGlobs(path)); - if (!matcher->ok()) - throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, - "Cannot compile regex from glob ({}): {}", path, matcher->error()); - - filter = [matcher](const std::string & p) - { - return re2::RE2::FullMatch(p, *matcher); - }; - } - } - - auto files_iterator = std::make_shared(paths, paths_to_archive, std::move(filter), query_info.query, virtual_columns, context); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; - auto files_to_read = std::max(paths_to_archive.size(), paths.size()); + size_t files_to_read = 0; + if (archive_info) + files_to_read = archive_info->paths_to_archives.size(); + else + files_to_read = paths.size(); + if (max_num_streams > files_to_read) num_streams = files_to_read; @@ -1478,7 +1714,7 @@ SinkToStoragePtr StorageFile::write( ContextPtr context, bool /*async_insert*/) { - if (!use_table_fd && !paths_to_archive.empty()) + if (!use_table_fd && archive_info.has_value()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported"); if (format_name == "Distributed") @@ -1817,4 +2053,34 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat filename = filename_view; } +StorageFile::ArchiveInfo StorageFile::getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read +) +{ + ArchiveInfo archive_info; + archive_info.path_in_archive = file_in_archive; + + if (file_in_archive.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(file_in_archive)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", file_in_archive, matcher->error()); + + archive_info.filter = [matcher, matcher_mutex = std::make_shared()](const std::string & p) mutable + { + std::lock_guard lock(*matcher_mutex); + return re2::RE2::FullMatch(p, *matcher); + }; + } + + archive_info.paths_to_archives = getPathsList(path_to_archive, user_files_path, context, total_bytes_to_read); + + return archive_info; +} + } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 5deaebb1600..97c3cebabbb 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,18 @@ public: bool supportsPartitionBy() const override { return true; } + struct ArchiveInfo + { + std::vector paths_to_archives; + std::string path_in_archive; // used when reading a single file from archive + IArchiveReader::NameFilter filter = {}; // used when files inside archive are defined with a glob + + bool isSingleFileRead() const + { + return !filter; + } + }; + ColumnsDescription getTableStructureFromFileDescriptor(ContextPtr context); static ColumnsDescription getTableStructureFromFile( @@ -91,12 +104,19 @@ public: const String & compression_method, const std::optional & format_settings, ContextPtr context, - const std::vector & paths_to_archive = {"auto"}); + const std::optional & archive_info = std::nullopt); static SchemaCache & getSchemaCache(const ContextPtr & context); static void parseFileSource(String source, String & filename, String & path_to_archive); + static ArchiveInfo getArchiveInfo( + const std::string & path_to_archive, + const std::string & file_in_archive, + const std::string & user_files_path, + ContextPtr context, + size_t & total_bytes_to_read); + bool supportsTrivialCountOptimization() const override { return true; } protected: @@ -128,7 +148,8 @@ private: std::string base_path; std::vector paths; - std::vector paths_to_archive; + + std::optional archive_info; bool is_db_table = true; /// Table is stored in real database, not user's file bool use_table_fd = false; /// Use table_fd instead of path diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4e43c5b818b..850f469b03b 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -288,7 +288,7 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context); } -void StorageMergeTree::checkTableCanBeDropped() const +void StorageMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { if (!supportsReplication() && isStaticStorage()) return; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c77e5140d75..89da9ab839e 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -102,7 +102,7 @@ public: void alter(const AlterCommands & commands, ContextPtr context, AlterLockHolder & table_lock_holder) override; - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; ActionLock getActionLock(StorageActionBlockType action_type) override; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 21ed4b91c62..a4304faeaec 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -150,7 +150,7 @@ public: } CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } - void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { getNested()->checkTableCanBeDropped(query_context); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 65550464495..cabdf67a315 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6164,7 +6164,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( } -void StorageReplicatedMergeTree::checkTableCanBeDropped() const +void StorageReplicatedMergeTree::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 8e9eed678c8..1f37416f881 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -201,7 +201,7 @@ public: bool supportsIndexForIn() const override { return true; } - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; ActionLock getActionLock(StorageActionBlockType action_type) override; @@ -385,7 +385,7 @@ private: friend class ReplicatedMergeTreeSinkImpl; friend class ReplicatedMergeTreePartCheckThread; friend class ReplicatedMergeTreeCleanupThread; - friend class AsyncBlockIDsCache; + friend class AsyncBlockIDsCache; friend class ReplicatedMergeTreeAlterThread; friend class ReplicatedMergeTreeRestartingThread; friend class ReplicatedMergeTreeAttachThread; @@ -512,7 +512,7 @@ private: /// A thread that removes old parts, log entries, and blocks. ReplicatedMergeTreeCleanupThread cleanup_thread; - AsyncBlockIDsCache async_block_ids_cache; + AsyncBlockIDsCache async_block_ids_cache; /// A thread that checks the data of the parts, as well as the queue of the parts to be checked. ReplicatedMergeTreePartCheckThread part_check_thread; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index c20898dd31e..8f96cb46910 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -153,7 +153,7 @@ public: } bool isView() const override { return false; } - void checkTableCanBeDropped() const override {} + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override {} private: mutable std::recursive_mutex nested_mutex; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 0f32195fd53..665f136e5cc 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -340,7 +340,8 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const return block.getByName("_idx").column; } -void addRequestedPathAndFileVirtualsToChunk(Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path) +void addRequestedPathAndFileVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename) { for (const auto & virtual_column : requested_virtual_columns) { @@ -350,9 +351,16 @@ void addRequestedPathAndFileVirtualsToChunk(Chunk & chunk, const NamesAndTypesLi } else if (virtual_column.name == "_file") { - size_t last_slash_pos = path.find_last_of('/'); - auto file_name = path.substr(last_slash_pos + 1); - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), file_name)); + if (filename) + { + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename)); + } + else + { + size_t last_slash_pos = path.find_last_of('/'); + auto filename_from_path = path.substr(last_slash_pos + 1); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)); + } } } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index d8d76e05038..70659958cdf 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -67,7 +67,8 @@ void filterByPathOrFile(std::vector & sources, const std::vector & pa sources = std::move(filtered_sources); } -void addRequestedPathAndFileVirtualsToChunk(Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path); +void addRequestedPathAndFileVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, const String * filename = nullptr); } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0f506040cd9..e3fcd6249d1 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1571,7 +1571,7 @@ void StorageWindowView::writeIntoWindowView( }); auto executor = builder.execute(); - executor->execute(builder.getNumThreads()); + executor->execute(builder.getNumThreads(), local_context->getSettingsRef().use_concurrency_control); } void StorageWindowView::startup() @@ -1599,7 +1599,7 @@ void StorageWindowView::shutdown() DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); } -void StorageWindowView::checkTableCanBeDropped() const +void StorageWindowView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const { auto table_id = getStorageID(); auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 847a4945d0e..231616ff820 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -119,7 +119,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - void checkTableCanBeDropped() const override; + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; void dropInnerTableIfAny(bool sync, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 56a6839ddbb..f75c56e6523 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -97,13 +97,14 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context size_t total_bytes_to_read = 0; Strings paths; - Strings paths_to_archives; + std::optional archive_info; if (path_to_archive.empty()) paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); else - paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read); + archive_info + = StorageFile::getArchiveInfo(path_to_archive, filename, context->getUserFilesPath(), context, total_bytes_to_read); - return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives); + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, archive_info); } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8a7ca59011d..eb3ead503e2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -618,6 +618,9 @@ class SettingsRandomizer: "America/Mazatlan", "America/Hermosillo", "Mexico/BajaSur", + # These timezones had DST transitions on some unusual dates (e.g. 2000-01-15 12:00:00). + "Africa/Khartoum", + "Africa/Juba", # server default that is randomized across all timezones # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same. get_localzone(), diff --git a/tests/config/config.d/validate_tcp_client_information.xml b/tests/config/config.d/validate_tcp_client_information.xml new file mode 100644 index 00000000000..db7b644719a --- /dev/null +++ b/tests/config/config.d/validate_tcp_client_information.xml @@ -0,0 +1,3 @@ + + true + diff --git a/tests/config/install.sh b/tests/config/install.sh index 95ffbe2a3f9..1b167fec92e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -61,6 +61,7 @@ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/confi ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index f42bff335ef..09a512eb5a4 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -61,4 +61,6 @@ + + true diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index df74cfffa54..aa5a1e766e6 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -84,7 +84,7 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" - data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( bytearray("Hello, world", "latin-1") @@ -92,7 +92,7 @@ def test_connections(): >= 0 ) - data_user_allowed = "PROXY TCP4 123.123.123.123 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data_user_allowed = "PROXY TCP4 123.123.123.123 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data_user_allowed, "latin-1")).find( bytearray("Hello, world", "latin-1") @@ -100,7 +100,7 @@ def test_connections(): >= 0 ) - data_user_restricted = "PROXY TCP4 127.0.0.1 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + data_user_restricted = "PROXY TCP4 127.0.0.1 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007user123\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat( server.ip_address, 9100, bytearray(data_user_restricted, "latin-1") diff --git a/tests/integration/test_config_hide_in_preprocessed/__init__.py b/tests/integration/test_config_hide_in_preprocessed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/config.xml b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml new file mode 100644 index 00000000000..aac5f572964 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/configs/config.xml @@ -0,0 +1,12 @@ + + 2000 + 60000000000 + 40000000000 + + + value + value_2 + https://connection.url/ + + + diff --git a/tests/integration/test_config_hide_in_preprocessed/configs/users.xml b/tests/integration/test_config_hide_in_preprocessed/configs/users.xml new file mode 100644 index 00000000000..7f196179f80 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/configs/users.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_config_hide_in_preprocessed/test.py b/tests/integration/test_config_hide_in_preprocessed/test.py new file mode 100644 index 00000000000..fd237063b18 --- /dev/null +++ b/tests/integration/test_config_hide_in_preprocessed/test.py @@ -0,0 +1,57 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", main_configs=["configs/config.xml"], user_configs=["configs/users.xml"] +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_hide_in_preprocessed(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_thread_pool_free_size'" + ) + == "2000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) + assert "key_1" in node.query("select collection from system.named_collections") + out = node.exec_in_container( + ["cat", "/var/lib/clickhouse/preprocessed_configs/config.xml"] + ) + assert ( + '2000' + not in out + ) + assert ( + '60000000000' + not in out + ) + assert ( + '40000000000' + in out + ) + assert '' not in out diff --git a/tests/integration/test_kafka_bad_messages/test.py b/tests/integration/test_kafka_bad_messages/test.py index a634ce36631..1633f230f83 100644 --- a/tests/integration/test_kafka_bad_messages/test.py +++ b/tests/integration/test_kafka_bad_messages/test.py @@ -320,23 +320,19 @@ def test_bad_messages_parsing_exception(kafka_cluster, max_retries=20): ) expected_result = """avro::Exception: Invalid data file. Magic does not match: : while parsing Kafka message (topic: Avro_err, partition: 0, offset: 0)\\'|1|1|1|default|kafka_Avro -Cannot parse input: expected \\'{\\' before: \\'qwertyuiop\\': while parsing Kafka message (topic: JSONEachRow_err, partition: 0, offset: 0)\\'|1|1|1|default|kafka_JSONEachRow +Cannot parse input: expected \\'{\\' before: \\'qwertyuiop\\': while parsing Kafka message (topic: JSONEachRow_err, partition: 0, offset: 0|1|1|1|default|kafka_JSONEachRow """ - retries = 0 - result_system_kafka_consumers = "" - while True: - result_system_kafka_consumers = instance.query( - """ - SELECT exceptions.text[1], length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers ORDER BY table, assignments.partition_id[1] - """ - ) - result_system_kafka_consumers = result_system_kafka_consumers.replace("\t", "|") - if result_system_kafka_consumers == expected_result or retries > max_retries: - break - retries += 1 - time.sleep(1) + # filter out stacktrace in exceptions.text[1] because it is hardly stable enough + result_system_kafka_consumers = instance.query_with_retry( + """ + SELECT substr(exceptions.text[1], 1, 131), length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers WHERE table in('kafka_Avro', 'kafka_JSONEachRow') ORDER BY table, assignments.partition_id[1] + """, + retry_count=max_retries, + sleep_time=1, + check_callback=lambda res: res.replace("\t", "|") == expected_result, + ) - assert result_system_kafka_consumers == expected_result + assert result_system_kafka_consumers.replace("\t", "|") == expected_result for format_name in [ "Avro", @@ -345,6 +341,54 @@ Cannot parse input: expected \\'{\\' before: \\'qwertyuiop\\': while parsing Kaf kafka_delete_topic(admin_client, f"{format_name}_err") +def test_bad_messages_to_mv(kafka_cluster, max_retries=20): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + kafka_create_topic(admin_client, "tomv") + + instance.query( + f""" + DROP TABLE IF EXISTS kafka_materialized; + DROP TABLE IF EXISTS kafka_consumer; + DROP TABLE IF EXISTS kafka1; + + CREATE TABLE kafka1 (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'tomv', + kafka_group_name = 'tomv', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 1; + + CREATE TABLE kafka_materialized(`key` UInt64, `value` UInt64) ENGINE = Log; + + CREATE MATERIALIZED VIEW kafka_consumer TO kafka_materialized + (`key` UInt64, `value` UInt64) AS + SELECT key, CAST(value, 'UInt64') AS value + FROM kafka1; + """ + ) + + kafka_produce(kafka_cluster, "tomv", ['{"key":10, "value":"aaa"}']) + + expected_result = """Code: 6. DB::Exception: Cannot parse string \\'aaa\\' as UInt64: syntax error at begin of string. Note: there are toUInt64OrZero and to|1|1|1|default|kafka1 +""" + result_system_kafka_consumers = instance.query_with_retry( + """ + SELECT substr(exceptions.text[1], 1, 131), length(exceptions.text) > 1 AND length(exceptions.text) < 15, length(exceptions.time) > 1 AND length(exceptions.time) < 15, abs(dateDiff('second', exceptions.time[1], now())) < 40, database, table FROM system.kafka_consumers WHERE table='kafka1' ORDER BY table, assignments.partition_id[1] + """, + retry_count=max_retries, + sleep_time=1, + check_callback=lambda res: res.replace("\t", "|") == expected_result, + ) + + assert result_system_kafka_consumers.replace("\t", "|") == expected_result + + kafka_delete_topic(admin_client, "tomv") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index 38f77b9d375..28c2597e52d 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -96,7 +96,10 @@ def threaded_run_test(sessions): thread.start() if len(sessions) > MAX_SESSIONS_FOR_USER: - assert_logs_contain_with_retry(instance, "overflown session count") + # High retry amount to avoid flakiness in ASAN (+Analyzer) tests + assert_logs_contain_with_retry( + instance, "overflown session count", retry_count=60 + ) instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") diff --git a/tests/integration/test_storage_s3/configs/users.xml b/tests/integration/test_storage_s3/configs/users.xml index 4b6ba057ecb..dcc4c4b3092 100644 --- a/tests/integration/test_storage_s3/configs/users.xml +++ b/tests/integration/test_storage_s3/configs/users.xml @@ -3,7 +3,7 @@ default - 1 + 1 diff --git a/tests/performance/prepare_hash_before_merge.xml b/tests/performance/prepare_hash_before_merge.xml index e99f762927f..a96d5d9f95c 100644 --- a/tests/performance/prepare_hash_before_merge.xml +++ b/tests/performance/prepare_hash_before_merge.xml @@ -1,4 +1,6 @@ SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 24 + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 56 + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 64 SELECT COUNT(DISTINCT Referer) FROM test.hits SETTINGS max_threads = 22 diff --git a/tests/queries/0_stateless/00514_interval_operators.sql b/tests/queries/0_stateless/00514_interval_operators.sql index e98e3211aaf..f9f3abbdb54 100644 --- a/tests/queries/0_stateless/00514_interval_operators.sql +++ b/tests/queries/0_stateless/00514_interval_operators.sql @@ -1,3 +1,5 @@ +SET session_timezone = 'Etc/UTC'; + SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 DAY + INTERVAL 1 MONTH - INTERVAL 1 YEAR; SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 HOUR + INTERVAL 1000 MINUTE + INTERVAL 10 SECOND; SELECT toDateTime('2017-10-30 08:18:19') + INTERVAL 1 DAY + INTERVAL number MONTH FROM system.numbers LIMIT 20; diff --git a/tests/queries/0_stateless/01601_proxy_protocol.reference b/tests/queries/0_stateless/01601_proxy_protocol.reference index a5c19667710..2201a1c6c08 100644 --- a/tests/queries/0_stateless/01601_proxy_protocol.reference +++ b/tests/queries/0_stateless/01601_proxy_protocol.reference @@ -1 +1,3 @@ Hello, world +Hello, world +Hello, world diff --git a/tests/queries/0_stateless/01601_proxy_protocol.sh b/tests/queries/0_stateless/01601_proxy_protocol.sh index c8ee3ad1f7b..5c51a40a2c7 100755 --- a/tests/queries/0_stateless/01601_proxy_protocol.sh +++ b/tests/queries/0_stateless/01601_proxy_protocol.sh @@ -6,4 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# Old clickhouse-client (with version 23.8-) sends "ClickHouse client" and then "ClickHouse" or "ClickHouse ". +# For backward compatibility purposes, the server accepts both variants. +printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\nClickHouse\24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' +printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\vClickHouse \24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' printf "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\21ClickHouse client\24\r\253\251\3\0\7default\0\4\1\0\1\0\0\t0.0.0.0:0\1\tmilovidov\21milovidov-desktop\21ClickHouse client\24\r\253\251\3\0\1\0\0\0\2\1\25SELECT 'Hello, world'\2\0\247\203\254l\325\\z|\265\254F\275\333\206\342\24\202\24\0\0\0\n\0\0\0\240\1\0\2\377\377\377\377\0\0\0" | nc "${CLICKHOUSE_HOST}" "${CLICKHOUSE_PORT_TCP_WITH_PROXY}" | head -c150 | grep --text -o -F 'Hello, world' diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference index a93209f6ba8..f966b5ce33f 100644 --- a/tests/queries/0_stateless/02354_usearch_index.reference +++ b/tests/queries/0_stateless/02354_usearch_index.reference @@ -141,3 +141,8 @@ Expression (Projection) Description: usearch GRANULARITY 4 Parts: 1/1 Granules: 1/4 +--- Test correctness of Usearch index with > 1 mark +1 [1,0,0,0] +9000 [9000,0,0,0] +1 (1,0,0,0) +9000 (9000,0,0,0) diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql index f21767ea6de..f771e2835fa 100644 --- a/tests/queries/0_stateless/02354_usearch_index.sql +++ b/tests/queries/0_stateless/02354_usearch_index.sql @@ -228,3 +228,35 @@ ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) LIMIT 3; DROP TABLE tab; + +SELECT '--- Test correctness of Usearch index with > 1 mark'; + +CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0]) +LIMIT 1; + +DROP TABLE tab; + +CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug +INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000); + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +SELECT * +FROM tab +ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0)) +LIMIT 1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index ca83253eaf8..0e80a21bf46 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -12,18 +12,21 @@ sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient +table_engine = sys.argv[1] + client = ClickHouseClient() # test table without partition client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC") -client.query( - """ +create_query = f""" CREATE TABLE t_async_insert_dedup_no_part ( KeyID UInt32 -) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') +) Engine = {table_engine}('/clickhouse/tables/{{shard}}/{{database}}/t_async_insert_dedup', '{{replica}}') ORDER BY (KeyID) """ -) + +client.query(create_query) + client.query( "insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", @@ -101,22 +104,22 @@ def fetch_and_insert_data(q, client): # main process client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC") -client.query( - """ +create_query = f""" CREATE TABLE t_async_insert_dedup ( EventDate DateTime, KeyID UInt32 -) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') +) Engine = {table_engine}('/clickhouse/tables/{{shard}}/{{database}}/t_async_insert_dedup', '{{replica}}') PARTITION BY toYYYYMM(EventDate) ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 """ -) + +client.query(create_query) q = queue.Queue(100) total_number = 10000 use_token = False -if sys.argv[-1] == "token": +if len(sys.argv) > 3 and sys.argv[2] == "token": use_token = True gen = Thread(target=generate_data, args=[q, total_number, use_token]) @@ -158,13 +161,14 @@ while True: break result = client.query( - "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" + "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" ) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}") + result = client.query( - "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" + "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" ) result = int(result.split()[0]) if result <= 0: diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.sh b/tests/queries/0_stateless/02481_async_insert_dedup.sh index e7cb5c33bf5..0fe06e6ab58 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02481_async_insert_dedup.python +python3 "$CURDIR"/02481_async_insert_dedup.python ReplicatedMergeTree diff --git a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh index 8ef6eecda24..bb2d07066a5 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02481_async_insert_dedup.python token +python3 "$CURDIR"/02481_async_insert_dedup.python ReplicatedMergeTree token diff --git a/tests/queries/0_stateless/02542_transform_new.reference b/tests/queries/0_stateless/02542_transform_new.reference index b6eaa692c41..faec7b5c777 100644 --- a/tests/queries/0_stateless/02542_transform_new.reference +++ b/tests/queries/0_stateless/02542_transform_new.reference @@ -30,3 +30,9 @@ sep4 sep5 8000 sep6 +issue #53187 +0 1 1 +1 0 0 +- +0 0 0 +1 1 1 diff --git a/tests/queries/0_stateless/02542_transform_new.sql b/tests/queries/0_stateless/02542_transform_new.sql index 43da0a50731..f3475d6157f 100644 --- a/tests/queries/0_stateless/02542_transform_new.sql +++ b/tests/queries/0_stateless/02542_transform_new.sql @@ -33,3 +33,16 @@ select 'sep6'; SELECT transform(-9223372036854775807, [-1], [toDecimal32(1024, 3)]) FROM system.numbers LIMIT 7; -- { serverError BAD_ARGUMENTS } SELECT [NULL, NULL, NULL, NULL], transform(number, [2147483648], [toDecimal32(1, 2)]) AS x FROM numbers(257) WHERE materialize(10); -- { serverError BAD_ARGUMENTS } SELECT transform(-2147483649, [1], [toDecimal32(1, 2)]) GROUP BY [1] WITH TOTALS; -- { serverError BAD_ARGUMENTS } + +SELECT 'issue #53187'; +SELECT + CAST(number, 'String') AS v2, + caseWithExpression('x', 'y', 0, cond2) AS cond1, + toNullable('0' = v2) AS cond2 +FROM numbers(2); +SELECT '-'; +SELECT + CAST(number, 'String') AS v2, + caseWithExpression('x', 'y', 0, cond2) AS cond1, + toNullable('1' = v2) AS cond2 +FROM numbers(2); diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib index 88b2c82f704..f678259d02d 100644 --- a/tests/queries/0_stateless/02661_read_from_archive.lib +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -8,6 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function read_archive_file() { $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DESC file('${user_files_path}/$1')" $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" @@ -16,15 +17,17 @@ function read_archive_file() { function run_archive_test() { $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table" - FILE_PREFIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_$1_" + extension_without_dot=$(echo $1 | sed -e 's/\.//g') + FILE_PREFIX="02661_read_from_archive_${CLICKHOUSE_DATABASE}_$extension_without_dot" - user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + user_files_path=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -o "/[^[:space:]]*nonexist.txt" | awk '{gsub("/nonexist.txt","",$1); print $1}') + touch ${FILE_PREFIX}_data0.csv echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" @@ -41,10 +44,10 @@ function run_archive_test() { echo "archive* {2..3}.csv" read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 - rm ${FILE_PREFIX}_data{1..3}.csv + rm ${FILE_PREFIX}_data{0..3}.csv } \ No newline at end of file diff --git a/tests/queries/0_stateless/02661_read_from_archive_7z.reference b/tests/queries/0_stateless/02661_read_from_archive_7z.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_7z.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_7z.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_tar.reference b/tests/queries/0_stateless/02661_read_from_archive_tar.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_tar.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_tar.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_tarbzip2.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_targz.reference b/tests/queries/0_stateless/02661_read_from_archive_targz.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_targz.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_targz.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_tarxz.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_tzst.reference b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_tzst.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_tzst.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02661_read_from_archive_zip.reference b/tests/queries/0_stateless/02661_read_from_archive_zip.reference index 27edb5536ad..7d406d45716 100644 --- a/tests/queries/0_stateless/02661_read_from_archive_zip.reference +++ b/tests/queries/0_stateless/02661_read_from_archive_zip.reference @@ -3,6 +3,8 @@ archive1 data1.csv 3 4 1 2 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 3 4 archive{1..2} data1.csv @@ -14,6 +16,8 @@ archive{1..2} data1.csv 1 2 3 4 3 4 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -31,6 +35,8 @@ archive{1,2} data{1,3}.csv 3 4 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -46,6 +52,8 @@ archive3 data*.csv 7 8 9 10 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 7 8 9 10 @@ -75,6 +83,8 @@ archive* *.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 1 2 1 2 3 4 @@ -104,6 +114,8 @@ archive* {2..3}.csv 9 10 11 12 11 12 +c1 Nullable(Int64) +c2 Nullable(Int64) 5 6 5 6 7 8 diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference index ccc02ad4f34..60464757bdd 100644 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.reference @@ -5,5 +5,11 @@ implicit: 4 Test 2: check Filesystem database 4 +30 +10 +4 +3 +2 +1 Test 3: check show database with Filesystem test02707 diff --git a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh index 7c9095b3d8b..dabea8a373a 100755 --- a/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh +++ b/tests/queries/0_stateless/02707_clickhouse_local_implicit_file_table_function.sh @@ -15,6 +15,23 @@ echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_1.csv') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_2.csv') select * from numbers(11, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_30.csv') select * from numbers(21, 10)" + +readonly nested_dir=$dir/nested +[[ -d $nested_dir ]] && rm -rd $nested_dir +mkdir $nested_dir +mkdir $nested_dir/subnested + +cp ${dir}/tmp_numbers_1.csv ${nested_dir}/nested_tmp_numbers_1.csv +cp ${dir}/tmp_numbers_1.csv ${nested_dir}/subnested/subnested_tmp_numbers_1.csv + +readonly other_nested_dir=$dir/other_nested +[[ -d $other_nested_dir ]] && rm -rd $other_nested_dir +mkdir $other_nested_dir +cp ${dir}/tmp_numbers_1.csv ${other_nested_dir}/tmp_numbers_1.csv + ################# echo "Test 1: check explicit and implicit call of the file table function" @@ -29,6 +46,12 @@ $CLICKHOUSE_LOCAL --multiline --multiquery -q """ DROP DATABASE IF EXISTS test; CREATE DATABASE test ENGINE = Filesystem('${dir}'); SELECT COUNT(*) FROM test.\`tmp.csv\`; +SELECT COUNT(*) FROM test.\`tmp_numbers_*.csv\`; +SELECT COUNT(*) FROM test.\`nested/nested_tmp_numbers_1*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\`; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\` WHERE position(_path, '${nested_dir}') > 0; +SELECT count(DISTINCT _path) FROM test.\`**/*.csv\` WHERE position(_path, '${nested_dir}') = 0; DROP DATABASE test; """ diff --git a/tests/queries/0_stateless/02722_database_filesystem.reference b/tests/queries/0_stateless/02722_database_filesystem.reference index c65dda7933a..17f84cfc49c 100644 --- a/tests/queries/0_stateless/02722_database_filesystem.reference +++ b/tests/queries/0_stateless/02722_database_filesystem.reference @@ -3,6 +3,14 @@ Test 1: create filesystem database and check implicit calls test1 4 4 +30 +10 +10 +4 +0 +2 +0 +OK 4 Test 2: check DatabaseFilesystem access rights and errors handling on server OK @@ -13,3 +21,6 @@ OK OK OK OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 99f2191c991..c21b1af2ff4 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -19,11 +19,17 @@ echo '3,"story",78912,"acabaab"' >> ${user_files_tmp_dir}/tmp.csv echo '4,"history",21321321,"cabaaba"' >> ${user_files_tmp_dir}/tmp.csv tmp_dir=${CLICKHOUSE_TEST_UNIQUE_NAME} + +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_1.csv') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_2.csv') select * from numbers(11, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$user_files_tmp_dir/tmp_numbers_30.csv') select * from numbers(21, 10)" + [[ -d $tmp_dir ]] && rm -rd $tmp_dir mkdir $tmp_dir cp ${user_files_tmp_dir}/tmp.csv ${tmp_dir}/tmp.csv cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp/tmp.csv cp ${user_files_tmp_dir}/tmp.csv ${user_files_tmp_dir}/tmp.myext +cp ${user_files_tmp_dir}/tmp_numbers_1.csv ${user_files_tmp_dir}/tmp/tmp_numbers_1.csv ################# echo "Test 1: create filesystem database and check implicit calls" @@ -35,6 +41,15 @@ echo $? ${CLICKHOUSE_CLIENT} --query "SHOW DATABASES" | grep "test1" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp.csv\`;" ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp/tmp.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/tmp/*tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`${unique_name}/*/*tmp_numbers_*.csv\`;" +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/*.csv\` WHERE startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/*.csv\` WHERE not startsWith(_path, '${user_files_tmp_dir}')"; +# **/* does not search in the current directory but searches recursively in nested directories. +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/**/*.csv\` WHERE startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT count(DISTINCT _path) FROM test1.\`${unique_name}/**/*.csv\` WHERE not startsWith(_path, '${user_files_tmp_dir}')"; +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_LOCAL} -q "SELECT COUNT(*) FROM \"${tmp_dir}/tmp.csv\"" ################# @@ -42,6 +57,9 @@ echo "Test 2: check DatabaseFilesystem access rights and errors handling on serv # DATABASE_ACCESS_DENIED: Allows list files only inside user_files ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`/tmp/tmp.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*/tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../tmp_numbers_*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: +${CLICKHOUSE_CLIENT} --query "SELECT COUNT(*) FROM test1.\`../*.csv\`;" 2>&1 | tr '\n' ' ' | grep -oF "PATH_ACCESS_DENIED" > /dev/null && echo "OK" || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} --multiline --multiquery --query """ USE test1; SELECT COUNT(*) FROM \"../${tmp_dir}/tmp.csv\"; diff --git a/tests/queries/0_stateless/02787_transform_null.reference b/tests/queries/0_stateless/02787_transform_null.reference index a650dbbd173..a84dd83d712 100644 --- a/tests/queries/0_stateless/02787_transform_null.reference +++ b/tests/queries/0_stateless/02787_transform_null.reference @@ -5,5 +5,5 @@ ONE a a \N 0 \N 0 \N -1 1 1 \N 1 1 +1 1 1 1 1 1 a \N 3 3 3 3 diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference new file mode 100644 index 00000000000..d19222b55ec --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference @@ -0,0 +1,31 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; +SELECT count() FROM t; +1000 +SELECT count() FROM t FINAL; +900 +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +900 +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; +900 +DROP TABLE t; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql new file mode 100644 index 00000000000..a89a1ff590a --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql @@ -0,0 +1,32 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); + +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); + +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; + +SELECT count() FROM t; +SELECT count() FROM t FINAL; + +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference index 1e3b82ac136..8f2c820522c 100644 --- a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.reference @@ -25,3 +25,37 @@ Test 3a: check literal no parsing overflow 1 Test 3b: check literal empty 1 +Test 4: select using * wildcard +30 +30 +30 +30 +30 +10 +30 +10 +Test 4b: select using ? wildcard +20 +10 +20 +10 +20 +Test 4c: select using '{' + '}' wildcards +20 +20 +1 +Test 4d: select using ? and * wildcards +30 +30 +30 +1 +30 +30 +Test 4e: select using ?, * and '{' + '}' wildcards +10 +20 +20 +20 +Test 4f: recursive search +2 +1 diff --git a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh index 5cf3b1c88fd..40b936481e7 100755 --- a/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh +++ b/tests/queries/0_stateless/02816_clickhouse_local_table_name_expressions.sh @@ -7,6 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) dir=${CLICKHOUSE_TEST_UNIQUE_NAME} [[ -d $dir ]] && rm -rd $dir mkdir $dir +mkdir $dir/nested +mkdir $dir/nested/nested # Create temporary csv file for tests echo '"id","str","int","text"' > $dir/tmp.csv @@ -15,6 +17,14 @@ echo '2,"def",456,"bacabaa"' >> $dir/tmp.csv echo '3,"story",78912,"acabaab"' >> $dir/tmp.csv echo '4,"history",21321321,"cabaaba"' >> $dir/tmp.csv +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_1.jsonl') select * from numbers(1, 10)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_2.jsonl') select * from numbers(11, 10)" + +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/tmp_numbers_30.jsonl') select * from numbers(21, 10)" + +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/nested/nested_numbers.jsonl') select * from numbers(1)" +$CLICKHOUSE_LOCAL -q "insert into function file('$dir/nested/nested/nested_nested_numbers.jsonl') select * from numbers(1)" + ################# echo "Test 1: check double quotes" @@ -52,5 +62,48 @@ echo "Test 3b: check literal empty" $CLICKHOUSE_LOCAL -q "SELECT * FROM ''" 2>&1 | grep -c "SYNTAX_ERROR" +echo "Test 4: select using * wildcard" +# Extension is required for auto table structure detection +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**********************.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_nu*ers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*_nu*ers_2.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*tmp_numbers_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*tmp_numbers_1*.jsonl'" + +echo "Test 4b: select using ? wildcard" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_?.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_??.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/??p_numbers??.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_n?mbers_1.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/t?p_n?mbers_?.jsonl'" + +echo "Test 4c: select using '{' + '}' wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers_{1,2}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/tmp_numbers__{1,2}.jsonl'" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" + +echo "Test 4d: select using ? and * wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*????.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*?***_.jsonl'" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/?*????_*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_*.jsonl'" + +echo "Test 4e: select using ?, * and '{' + '}' wildcards" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?{1,3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_{1..3}.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/*?*_num*e?s_{1,2}.jsonl'" + +echo "Test 4f: recursive search" +# /**/* pattern does not look in current directory +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/**/*.jsonl'" +$CLICKHOUSE_LOCAL -q "SELECT count(*) FROM '$dir/nested/**/*.jsonl'" + + # Remove temporary dir with files rm -rd $dir diff --git a/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference new file mode 100644 index 00000000000..26ba1a3422b --- /dev/null +++ b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.reference @@ -0,0 +1,32 @@ +"2001:db8::1" +"2001:db8::1" +"::200" +"2001:db8::1" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" +"" diff --git a/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql new file mode 100644 index 00000000000..e590064af44 --- /dev/null +++ b/tests/queries/0_stateless/02845_domain_rfc_support_ipv6.sql @@ -0,0 +1,33 @@ +SELECT domainRFC('http://[2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[::200]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1]') FORMAT CSV; +-- Does not conform to the IPv6 format. +SELECT domainRFC('[2001db81]:80') FORMAT CSV; +SELECT domainRFC('[20[01:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[20[01:db]8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001:db8::1') FORMAT CSV; +SELECT domainRFC('2001:db8::1]:80') FORMAT CSV; +SELECT domainRFC('[2001db81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db.81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db/81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db?81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db#81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db@81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db;81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db=81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db&81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db~81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db%81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db<81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db>81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db{81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db}81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db|81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db\81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db^81]:80') FORMAT CSV; +SELECT domainRFC('[2001::db 81]:80') FORMAT CSV; +SELECT domainRFC('[[]:80') FORMAT CSV; +SELECT domainRFC('[]]:80') FORMAT CSV; +SELECT domainRFC('[]:80') FORMAT CSV; +SELECT domainRFC('[ ]:80') FORMAT CSV; diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference new file mode 100644 index 00000000000..428ba88bff0 --- /dev/null +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 1 +1 +1 +1 +1 1 diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql new file mode 100644 index 00000000000..a5ddf830d48 --- /dev/null +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 (key UInt8) ENGINE = Memory; +INSERT INTO t1 VALUES (1),(2); + +SET join_algorithm = 'full_sorting_merge'; + +SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; +SELECT key FROM ( SELECT 1 AS key ) AS t1 JOIN ( SELECT 1 AS key ) AS t2 ON t1.key = t2.key WHERE key; +SELECT * FROM ( SELECT 1 AS key GROUP BY NULL ) AS t1 INNER JOIN (SELECT 1 AS key) AS t2 ON t1.key = t2.key WHERE t1.key ORDER BY key; + +SET max_rows_in_set_to_optimize_join = 0; + +SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; +SELECT key FROM ( SELECT 1 AS key ) AS t1 JOIN ( SELECT 1 AS key ) AS t2 ON t1.key = t2.key WHERE key; +SELECT * FROM ( SELECT 1 AS key GROUP BY NULL ) AS t1 INNER JOIN (SELECT 1 AS key) AS t2 ON t1.key = t2.key WHERE t1.key ORDER BY key; + +DROP TABLE IF EXISTS t1; diff --git a/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference new file mode 100644 index 00000000000..6e82dd5d023 --- /dev/null +++ b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.child\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.child2\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.child3\n(\n `id` Int32,\n `pid` Int32\n)\nENGINE = MergeTree\nPRIMARY KEY id\nORDER BY id\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql new file mode 100644 index 00000000000..2d814643116 --- /dev/null +++ b/tests/queries/0_stateless/02863_ignore_foreign_keys_in_tables_definition.sql @@ -0,0 +1,29 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/53380 + + +drop table if exists parent; +drop table if exists child; + +create table parent (id int, primary key(id)) engine MergeTree; +create table child (id int, pid int, primary key(id), foreign key(pid)) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child (id int, pid int, primary key(id), foreign key(pid) references) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child (id int, pid int, primary key(id), foreign key(pid) references parent(pid)) engine MergeTree; + +show create table child; + +create table child2 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete) engine MergeTree; -- { clientError SYNTAX_ERROR } +create table child2 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete cascade) engine MergeTree; + +show create table child2; + +create table child3 (id int, pid int, primary key(id), + foreign key(pid) references parent(pid) on delete cascade on update restrict) engine MergeTree; + +show create table child3; + +drop table child3; +drop table child2; +drop table child; +drop table parent; \ No newline at end of file diff --git a/tests/queries/0_stateless/02868_select_support_from_keywords.reference b/tests/queries/0_stateless/02868_select_support_from_keywords.reference new file mode 100644 index 00000000000..d2dcb047cf0 --- /dev/null +++ b/tests/queries/0_stateless/02868_select_support_from_keywords.reference @@ -0,0 +1 @@ +CREATE VIEW default.test_view\n(\n `date` Date,\n `__sign` Int8,\n `from` Float64,\n `to` Float64\n) AS\nWITH cte AS\n (\n SELECT\n date,\n __sign,\n from,\n to\n FROM default.test_table\n FINAL\n )\nSELECT\n date,\n __sign,\n from,\n to\nFROM cte diff --git a/tests/queries/0_stateless/02868_select_support_from_keywords.sql b/tests/queries/0_stateless/02868_select_support_from_keywords.sql new file mode 100644 index 00000000000..dc06651a8eb --- /dev/null +++ b/tests/queries/0_stateless/02868_select_support_from_keywords.sql @@ -0,0 +1,5 @@ +create table test_table ( `date` Date, `__sign` Int8, `from` Float64, `to` Float64 ) ENGINE = CollapsingMergeTree(__sign) PARTITION BY toYYYYMM(date) ORDER BY (date) SETTINGS index_granularity = 8192; +create VIEW test_view AS WITH cte AS (SELECT date, __sign, "from", "to" FROM test_table FINAL) SELECT date, __sign, "from", "to" FROM cte; +show create table test_view; +drop table test_view; +drop table test_table; diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0f15c761d2a..6d09f8501b3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,12 @@ +v23.7.5.30-stable 2023-08-28 v23.7.4.5-stable 2023-08-08 v23.7.3.14-stable 2023-08-05 v23.7.2.25-stable 2023-08-03 v23.7.1.2470-stable 2023-07-27 +v23.6.3.87-stable 2023-08-28 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 +v23.5.5.92-stable 2023-08-28 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 v23.5.2.7-stable 2023-06-10 @@ -14,6 +17,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.11.5-lts 2023-08-28 v23.3.10.5-lts 2023-08-23 v23.3.9.55-lts 2023-08-21 v23.3.8.21-lts 2023-07-13 @@ -64,6 +68,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.21.38-lts 2023-08-28 v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12