Merge remote-tracking branch 'origin/master' into revert-53782-revert-52762-planner-prepare-filters-for-analysis

This commit is contained in:
Dmitry Novik 2023-08-31 14:40:53 +00:00
commit 5266511281
472 changed files with 9271 additions and 2654 deletions

View File

@ -21,8 +21,7 @@ assignees: ''
**Enable crash reporting**
> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`:
> Change "enabled" to true in "send_crash_reports" section in `config.xml`:
```
<send_crash_reports>
<!-- Changing <enabled> to true allows sending crash reports to -->

View File

@ -3,9 +3,6 @@ name: BackportPR
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

View File

@ -3,9 +3,6 @@ name: MasterCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

View File

@ -3,9 +3,6 @@ name: PullRequestCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
pull_request:

View File

@ -3,9 +3,6 @@ name: ReleaseBranchCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

13
.gitmodules vendored
View File

@ -3,7 +3,7 @@
url = https://github.com/facebook/zstd
[submodule "contrib/lz4"]
path = contrib/lz4
url = https://github.com/lz4/lz4
url = https://github.com/ClickHouse/lz4
[submodule "contrib/librdkafka"]
path = contrib/librdkafka
url = https://github.com/ClickHouse/librdkafka
@ -13,7 +13,6 @@
[submodule "contrib/zlib-ng"]
path = contrib/zlib-ng
url = https://github.com/ClickHouse/zlib-ng
branch = clickhouse-2.0.x
[submodule "contrib/googletest"]
path = contrib/googletest
url = https://github.com/google/googletest
@ -47,7 +46,6 @@
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/ClickHouse/arrow
branch = blessed/release-6.0.1
[submodule "contrib/thrift"]
path = contrib/thrift
url = https://github.com/apache/thrift
@ -93,7 +91,6 @@
[submodule "contrib/grpc"]
path = contrib/grpc
url = https://github.com/ClickHouse/grpc
branch = v1.33.2
[submodule "contrib/aws"]
path = contrib/aws
url = https://github.com/ClickHouse/aws-sdk-cpp
@ -140,11 +137,9 @@
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse/cpp-driver
branch = clickhouse
[submodule "contrib/libuv"]
path = contrib/libuv
url = https://github.com/ClickHouse/libuv
branch = clickhouse
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt
@ -157,11 +152,9 @@
[submodule "contrib/cyrus-sasl"]
path = contrib/cyrus-sasl
url = https://github.com/ClickHouse/cyrus-sasl
branch = cyrus-sasl-2.1
[submodule "contrib/croaring"]
path = contrib/croaring
url = https://github.com/RoaringBitmap/CRoaring
branch = v0.2.66
[submodule "contrib/miniselect"]
path = contrib/miniselect
url = https://github.com/danlark1/miniselect
@ -174,7 +167,6 @@
[submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp
url = https://github.com/abseil/abseil-cpp
branch = lts_2021_11_02
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse/dragonbox
@ -187,7 +179,6 @@
[submodule "contrib/boringssl"]
path = contrib/boringssl
url = https://github.com/ClickHouse/boringssl
branch = unknown_branch_from_artur
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse/NuRaft
@ -248,7 +239,6 @@
[submodule "contrib/annoy"]
path = contrib/annoy
url = https://github.com/ClickHouse/annoy
branch = ClickHouse-master
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl
@ -282,7 +272,6 @@
[submodule "contrib/openssl"]
path = contrib/openssl
url = https://github.com/openssl/openssl
branch = openssl-3.0
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**<br/>
**[ClickHouse release v23.7, 2023-07-27](#237)**<br/>
**[ClickHouse release v23.6, 2023-06-30](#236)**<br/>
**[ClickHouse release v23.5, 2023-06-08](#235)**<br/>
@ -10,6 +11,228 @@
# 2023 Changelog
### <a id="238"></a> ClickHouse release 23.8 LTS, 2023-08-31
#### Backward Incompatible Change
* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_<disk_name>(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)).
* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)).
#### New Feature
* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)).
* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)).
* A system table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)).
* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)).
* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)).
* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)).
* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)).
* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)).
* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)).
* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)).
* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)).
* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)).
* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)).
* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)).
* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* TODO: edit it: Add support for plural units. [#53641](https://github.com/ClickHouse/ClickHouse/pull/53641) ([irenjj](https://github.com/irenjj)).
* TODO: edit it: Added server setting validate_tcp_client_information determines whether validation of client information enabled when query packet is received. [#53907](https://github.com/ClickHouse/ClickHouse/pull/53907) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)).
#### Experimental Feature
* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it.
* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)).
* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it.
#### Performance Improvement
* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)).
* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)).
* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)).
* Use filter by file/path before reading in `url`/`file`/`hdfs` table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)).
* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)).
* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)).
* TODO: edit it: This patch will provide a method to deal with all the hashsets in parallel before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)).
* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)).
* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)).
* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)).
* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)).
* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)).
* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)).
* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)).
* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)).
#### Improvement
* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)).
* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)).
* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)).
* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)).
* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)).
* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)).
* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)).
* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)).
* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)).
* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)).
* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)).
* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)).
* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)).
* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)).
* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)).
* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)).
* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)).
* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)).
* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)).
* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)).
* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)).
* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)).
* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)).
* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)).
* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)).
* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)).
* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)).
* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)).
* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)).
* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)).
* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)).
* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)).
* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)).
* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)).
* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)).
* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)).
* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)).
* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)).
* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)).
* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)).
* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)).
* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)).
* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)).
* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)).
* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)).
* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)).
* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)).
* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)).
* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)).
* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)).
#### Build/Testing/Packaging Improvement
* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)).
* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)).
* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)).
* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)).
* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)).
* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)).
* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)).
* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)).
* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)).
* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)).
* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)).
* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)).
* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)).
* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)).
* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)).
* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)).
* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)).
* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)).
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)).
* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)).
* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)).
* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)).
* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)).
* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)).
### <a id="237"></a> ClickHouse release 23.7, 2023-07-27
#### Backward Incompatible Change

View File

@ -35,10 +35,6 @@ find_package(Threads REQUIRED)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies
# which are ok with static libraries but not with dynamic ones
link_libraries(global-group)
if (NOT OS_ANDROID)
if (NOT USE_MUSL)
# Our compatibility layer doesn't build under Android, many errors in musl.
@ -47,6 +43,8 @@ if (NOT OS_ANDROID)
add_subdirectory(base/harmful)
endif ()
link_libraries(global-group)
target_link_libraries(global-group INTERFACE
-Wl,--start-group
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>

View File

@ -136,9 +136,7 @@ add_contrib (aws-cmake
)
add_contrib (base64-cmake base64)
if (NOT ARCH_S390X)
add_contrib (simdjson-cmake simdjson)
endif()
add_contrib (rapidjson-cmake rapidjson)
add_contrib (fastops-cmake fastops)
add_contrib (libuv-cmake libuv)

View File

@ -334,20 +334,36 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/api_vector.cc"
"${LIBRARY_DIR}/compute/cast.cc"
"${LIBRARY_DIR}/compute/exec.cc"
"${LIBRARY_DIR}/compute/exec/accumulation_queue.cc"
"${LIBRARY_DIR}/compute/exec/accumulation_queue.h"
"${LIBRARY_DIR}/compute/exec/aggregate.cc"
"${LIBRARY_DIR}/compute/exec/aggregate_node.cc"
"${LIBRARY_DIR}/compute/exec/asof_join_node.cc"
"${LIBRARY_DIR}/compute/exec/bloom_filter.cc"
"${LIBRARY_DIR}/compute/exec/exec_plan.cc"
"${LIBRARY_DIR}/compute/exec/expression.cc"
"${LIBRARY_DIR}/compute/exec/filter_node.cc"
"${LIBRARY_DIR}/compute/exec/project_node.cc"
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/map_node.cc"
"${LIBRARY_DIR}/compute/exec/options.cc"
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
"${LIBRARY_DIR}/compute/exec/project_node.cc"
"${LIBRARY_DIR}/compute/exec/query_context.cc"
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
"${LIBRARY_DIR}/compute/exec/source_node.cc"
"${LIBRARY_DIR}/compute/exec/swiss_join.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/exec/tpch_node.cc"
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/function.cc"
"${LIBRARY_DIR}/compute/function_internal.cc"
"${LIBRARY_DIR}/compute/kernel.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
"${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
@ -355,49 +371,43 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/exec/union_node.cc"
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
"${LIBRARY_DIR}/compute/exec/key_map.cc"
"${LIBRARY_DIR}/compute/exec/util.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
"${LIBRARY_DIR}/compute/exec/task_util.cc"
"${LIBRARY_DIR}/compute/light_array.cc"
"${LIBRARY_DIR}/compute/registry.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
"${LIBRARY_DIR}/compute/row/grouper.cc"
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
"${LIBRARY_DIR}/compute/row/row_internal.cc"
"${LIBRARY_DIR}/ipc/dictionary.cc"

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 063a9372b4ae304e869a5c5724971d0501552731
Subproject commit ae94606a70f1e298ce2a5718db858079185c4d9c

View File

@ -25,18 +25,16 @@ if (OS_LINUX)
)
endif ()
# headers-only
# headers-only
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})
# asio
target_compile_definitions (_boost_headers_only INTERFACE
BOOST_ASIO_STANDALONE=1
# Avoid using of deprecated in c++ > 17 std::result_of
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 # Avoid using of deprecated in c++ > 17 std::result_of
BOOST_TIMER_ENABLE_DEPRECATED=1 # wordnet-blast (enabled via USE_NLP) uses Boost legacy timer classes
)
# iostreams

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d
Subproject commit 791d68fd89902835133c50435e380ec7a73271b7

2
contrib/lz4 vendored

@ -1 +1 @@
Subproject commit e82198428c8061372d5adef1f9bfff4203f6081e
Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40

View File

@ -13,6 +13,11 @@ add_library (ch_contrib::lz4 ALIAS _lz4)
target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
target_compile_definitions (_lz4 PUBLIC LZ4_FAST_DEC_LOOP=1)
if(ARCH_S390X)
target_compile_definitions(_lz4 PRIVATE LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT)
endif()
if (SANITIZE STREQUAL "undefined")
target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined)
endif ()

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 19cc035b6c6f2283573d29c7ea7f7d675cf750ce
Subproject commit 245cb0291e0db99d9ccf3692fa76f440b2b054c2

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/cmp.h.in
* Generated by Makefile from include/openssl/cmp.h.in
*
* Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved.
* Copyright Nokia 2007-2019
* Copyright Siemens AG 2015-2019
*
@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO;
* -- CertReqMsg
* }
*/
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_request -3
# define OSSL_CMP_PKISTATUS_trans -2
# define OSSL_CMP_PKISTATUS_unspecified -1
# define OSSL_CMP_PKISTATUS_accepted 0
# define OSSL_CMP_PKISTATUS_grantedWithMods 1
# define OSSL_CMP_PKISTATUS_rejection 2
# define OSSL_CMP_PKISTATUS_waiting 3
# define OSSL_CMP_PKISTATUS_revocationWarning 4
# define OSSL_CMP_PKISTATUS_revocationNotification 5
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6
typedef ASN1_INTEGER OSSL_CMP_PKISTATUS;
DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS)
@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted,
int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey);
int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx,
const unsigned char *ref, int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec,
const int len);
int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx,
const unsigned char *sec, int len);
/* CMP message header and extra certificates: */
int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name);
int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav);
int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx);
int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx,
STACK_OF(X509) *extraCertsOut);
/* certificate template: */
@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr);
OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg);
OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid);
OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx,
const char *propq);

View File

@ -1,6 +1,6 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/opensslv.h.in
* Generated by Makefile from include/openssl/opensslv.h.in
*
* Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
*
@ -29,7 +29,7 @@ extern "C" {
*/
# define OPENSSL_VERSION_MAJOR 3
# define OPENSSL_VERSION_MINOR 0
# define OPENSSL_VERSION_PATCH 7
# define OPENSSL_VERSION_PATCH 10
/*
* Additional version information
@ -74,21 +74,21 @@ extern "C" {
* longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and
* OPENSSL_VERSION_BUILD_METADATA_STR appended.
*/
# define OPENSSL_VERSION_STR "3.0.7"
# define OPENSSL_FULL_VERSION_STR "3.0.7"
# define OPENSSL_VERSION_STR "3.0.10"
# define OPENSSL_FULL_VERSION_STR "3.0.10"
/*
* SECTION 3: ADDITIONAL METADATA
*
* These strings are defined separately to allow them to be parsable.
*/
# define OPENSSL_RELEASE_DATE "1 Nov 2022"
# define OPENSSL_RELEASE_DATE "1 Aug 2023"
/*
* SECTION 4: BACKWARD COMPATIBILITY
*/
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022"
# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023"
/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */
# ifdef OPENSSL_VERSION_PRE_RELEASE

View File

@ -1,8 +1,8 @@
/*
* WARNING: do not edit!
* Generated by Makefile from ../include/openssl/x509v3.h.in
* Generated by Makefile from include/openssl/x509v3.h.in
*
* Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st {
OTHERNAME *otherName; /* otherName */
ASN1_IA5STRING *rfc822Name;
ASN1_IA5STRING *dNSName;
ASN1_TYPE *x400Address;
ASN1_STRING *x400Address;
X509_NAME *directoryName;
EDIPARTYNAME *ediPartyName;
ASN1_IA5STRING *uniformResourceIdentifier;

View File

@ -32,7 +32,6 @@ set(RE2_SOURCES
${SRC_DIR}/re2/tostring.cc
${SRC_DIR}/re2/unicode_casefold.cc
${SRC_DIR}/re2/unicode_groups.cc
${SRC_DIR}/util/pcre.cc
${SRC_DIR}/util/rune.cc
${SRC_DIR}/util/strutil.cc
)

View File

@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT
echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
echo '/boost/core/*' >> $FILES_TO_CHECKOUT
echo '/boost/describe/*' >> $FILES_TO_CHECKOUT
echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT
git config core.sparsecheckout true
git checkout $1
git read-tree -mu HEAD
git read-tree -mu HEAD

2
contrib/usearch vendored

@ -1 +1 @@
Subproject commit 387b78b28b17b8954024ffc81e97cbcfa10d1f30
Subproject commit f942b6f334b31716f9bdb02eb6a25fa6b222f5ba

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.7.4.5"
ARG VERSION="23.7.5.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -5,61 +5,166 @@
# and their names will contain a hash of the table structure,
# which allows exporting tables from servers of different versions.
# Config file contains KEY=VALUE pairs with any necessary parameters like:
# CLICKHOUSE_CI_LOGS_HOST - remote host
# CLICKHOUSE_CI_LOGS_USER - password for user
# CLICKHOUSE_CI_LOGS_PASSWORD - password for user
CLICKHOUSE_CI_LOGS_CREDENTIALS=${CLICKHOUSE_CI_LOGS_CREDENTIALS:-/tmp/export-logs-config.sh}
CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
# Pre-configured destination cluster, where to export the data
CLUSTER=${CLUSTER:=system_logs_export}
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""}
function __set_connection_args
{
# It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective.
# That's why we must stick to the generated option
CONNECTION_ARGS=(
--receive_timeout=10 --send_timeout=10 --secure
--user "${CLICKHOUSE_CI_LOGS_USER}" --host "${CLICKHOUSE_CI_LOGS_HOST}"
--password "${CLICKHOUSE_CI_LOGS_PASSWORD}"
)
}
# Create all configured system logs:
clickhouse-client --query "SYSTEM FLUSH LOGS"
function __shadow_credentials
{
# The function completely screws the output, it shouldn't be used in normal functions, only in ()
# The only way to substitute the env as a plain text is using perl 's/\Qsomething\E/another/
exec &> >(perl -pe '
s(\Q$ENV{CLICKHOUSE_CI_LOGS_HOST}\E)[CLICKHOUSE_CI_LOGS_HOST]g;
s(\Q$ENV{CLICKHOUSE_CI_LOGS_USER}\E)[CLICKHOUSE_CI_LOGS_USER]g;
s(\Q$ENV{CLICKHOUSE_CI_LOGS_PASSWORD}\E)[CLICKHOUSE_CI_LOGS_PASSWORD]g;
')
}
# It's doesn't make sense to try creating tables if SYNC fails
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client --receive_timeout 180 $CONNECTION_PARAMETERS || exit 0
function check_logs_credentials
(
# The function connects with given credentials, and if it's unable to execute the simplest query, returns exit code
# For each system log table:
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
hash=$(clickhouse-client --query "
SELECT sipHash64(groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)
")
# First check, if all necessary parameters are set
set +x
for parameter in CLICKHOUSE_CI_LOGS_HOST CLICKHOUSE_CI_LOGS_USER CLICKHOUSE_CI_LOGS_PASSWORD; do
export -p | grep -q "$parameter" || {
echo "Credentials parameter $parameter is unset"
return 1
}
done
# Create the destination table with adapted name and structure:
statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
s/^\($/('"$EXTRA_COLUMNS"'/;
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
/^TTL /d
')
__shadow_credentials
__set_connection_args
local code
# Catch both success and error to not fail on `set -e`
clickhouse-client "${CONNECTION_ARGS[@]}" -q 'SELECT 1 FORMAT Null' && return 0 || code=$?
if [ "$code" != 0 ]; then
echo 'Failed to connect to CI Logs cluster'
return $code
fi
)
echo "Creating destination table ${table}_${hash}" >&2
function config_logs_export_cluster
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then
echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup"
return
fi
set -a
# shellcheck disable=SC1090
source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}"
set +a
__shadow_credentials
echo "Checking if the credentials work"
check_logs_credentials || return 0
cluster_config="${1:-/etc/clickhouse-server/config.d/system_logs_export.yaml}"
mkdir -p "$(dirname "$cluster_config")"
echo "remote_servers:
${CLICKHOUSE_CI_LOGS_CLUSTER}:
shard:
replica:
secure: 1
user: '${CLICKHOUSE_CI_LOGS_USER}'
host: '${CLICKHOUSE_CI_LOGS_HOST}'
port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > "$cluster_config"
echo "Cluster ${CLICKHOUSE_CI_LOGS_CLUSTER} is confugured in ${cluster_config}"
)
echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue
function setup_logs_replication
(
# The function is launched in a separate shell instance to not expose the
# exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS
set +x
# disable output
if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then
echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup"
return 0
fi
set -a
# shellcheck disable=SC1090
source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}"
set +a
__shadow_credentials
echo "Checking if the credentials work"
check_logs_credentials || return 0
__set_connection_args
echo "Creating table system.${table}_sender" >&2
echo 'Create all configured system logs'
clickhouse-client --query "SYSTEM FLUSH LOGS"
# Create Distributed table and materialized view to watch on the original table:
clickhouse-client --query "
CREATE TABLE system.${table}_sender
ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash})
SETTINGS flush_on_detach=0
EMPTY AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}
"
# It's doesn't make sense to try creating tables if SYNC fails
echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client --receive_timeout 180 "${CONNECTION_ARGS[@]}" || return 0
echo "Creating materialized view system.${table}_watcher" >&2
# For each system log table:
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
hash=$(clickhouse-client --query "
SELECT sipHash64(groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)
")
clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}
"
done
# Create the destination table with adapted name and structure:
statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
s/^\($/('"$EXTRA_COLUMNS"'/;
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
/^TTL /d
')
echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2
echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \
--distributed_ddl_task_timeout=10 --receive_timeout=10 --send_timeout=10 \
"${CONNECTION_ARGS[@]}" || continue
echo "Creating table system.${table}_sender" >&2
# Create Distributed table and materialized view to watch on the original table:
clickhouse-client --query "
CREATE TABLE system.${table}_sender
ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash})
SETTINGS flush_on_detach=0
EMPTY AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}
" || continue
echo "Creating materialized view system.${table}_watcher" >&2
clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}
" || continue
done
)

View File

@ -1,6 +1,8 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031,SC2010,SC2015
# shellcheck disable=SC1091
source /setup_export_logs.sh
set -x
# core.COMM.PID-TID
@ -123,22 +125,7 @@ EOL
</clickhouse>
EOL
# Setup a cluster for logs export to ClickHouse Cloud
# Note: these variables are provided to the Docker run command by the Python script in tests/ci
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
echo "
remote_servers:
system_logs_export:
shard:
replica:
secure: 1
user: ci
host: '${CLICKHOUSE_CI_LOGS_HOST}'
port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > db/config.d/system_logs_export.yaml
fi
config_logs_export_cluster db/config.d/system_logs_export.yaml
}
function filter_exists_and_template
@ -242,20 +229,7 @@ quit
kill -0 $server_pid # This checks that it is our server that is started and not some other one
echo 'Server started and responded'
# Initialize export of system logs to ClickHouse Cloud
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
# TODO: Check if the password will appear in the logs.
export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
/setup_export_logs.sh
# Unset variables after use
export CONNECTION_PARAMETERS=''
export CLICKHOUSE_CI_LOGS_HOST=''
export CLICKHOUSE_CI_LOGS_PASSWORD=''
fi
setup_logs_replication
# SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric.
# SC2046: Quote this to prevent word splitting. Actually I need word splitting.

View File

@ -6,7 +6,7 @@ RUN apt-get install -y wget openjdk-8-jre
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \
tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz
RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \
tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz
RUN apt install -y vim

View File

@ -103,7 +103,7 @@ RUN python3 -m pip install --no-cache-dir \
urllib3
# Hudi supports only spark 3.3.*, not 3.4
RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
&& tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \
&& rm spark-3.3.2-bin-hadoop3.tgz

View File

@ -1,5 +1,7 @@
#!/bin/bash
# shellcheck disable=SC1091
source /setup_export_logs.sh
set -e -x
# Choose random timezone for this test run
@ -20,21 +22,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateful
# Setup a cluster for logs export to ClickHouse Cloud
# Note: these variables are provided to the Docker run command by the Python script in tests/ci
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
echo "
remote_servers:
system_logs_export:
shard:
replica:
secure: 1
user: ci
host: '${CLICKHOUSE_CI_LOGS_HOST}'
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > /etc/clickhouse-server/config.d/system_logs_export.yaml
fi
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
function start()
{
@ -82,20 +70,7 @@ function start()
start
# Initialize export of system logs to ClickHouse Cloud
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
# TODO: Check if the password will appear in the logs.
export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
./setup_export_logs.sh
# Unset variables after use
export CONNECTION_PARAMETERS=''
export CLICKHOUSE_CI_LOGS_HOST=''
export CLICKHOUSE_CI_LOGS_PASSWORD=''
fi
setup_logs_replication
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS

View File

@ -73,7 +73,7 @@ RUN arch=${TARGETARCH:-amd64} \
&& chmod +x ./mc ./minio
RUN wget --no-verbose 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \
RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \
&& tar -xvf hadoop-3.3.1.tar.gz \
&& rm -rf hadoop-3.3.1.tar.gz

View File

@ -1,5 +1,8 @@
#!/bin/bash
# shellcheck disable=SC1091
source /setup_export_logs.sh
# fail on errors, verbose and export all env variables
set -e -x -a
@ -36,21 +39,7 @@ fi
./setup_minio.sh stateless
./setup_hdfs_minicluster.sh
# Setup a cluster for logs export to ClickHouse Cloud
# Note: these variables are provided to the Docker run command by the Python script in tests/ci
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
echo "
remote_servers:
system_logs_export:
shard:
replica:
secure: 1
user: ci
host: '${CLICKHOUSE_CI_LOGS_HOST}'
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > /etc/clickhouse-server/config.d/system_logs_export.yaml
fi
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
# For flaky check we also enable thread fuzzer
if [ "$NUM_TRIES" -gt "1" ]; then
@ -116,20 +105,7 @@ do
sleep 1
done
# Initialize export of system logs to ClickHouse Cloud
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
# TODO: Check if the password will appear in the logs.
export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
./setup_export_logs.sh
# Unset variables after use
export CONNECTION_PARAMETERS=''
export CLICKHOUSE_CI_LOGS_HOST=''
export CLICKHOUSE_CI_LOGS_PASSWORD=''
fi
setup_logs_replication
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01

View File

@ -5,6 +5,8 @@
# Avoid overlaps with previous runs
dmesg --clear
# shellcheck disable=SC1091
source /setup_export_logs.sh
set -x
@ -51,38 +53,11 @@ configure
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
# Setup a cluster for logs export to ClickHouse Cloud
# Note: these variables are provided to the Docker run command by the Python script in tests/ci
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
echo "
remote_servers:
system_logs_export:
shard:
replica:
secure: 1
user: ci
host: '${CLICKHOUSE_CI_LOGS_HOST}'
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > /etc/clickhouse-server/config.d/system_logs_export.yaml
fi
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
start
# Initialize export of system logs to ClickHouse Cloud
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
export EXTRA_COLUMNS_EXPRESSION="$PULL_REQUEST_NUMBER AS pull_request_number, '$COMMIT_SHA' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
# TODO: Check if the password will appear in the logs.
export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
./setup_export_logs.sh
# Unset variables after use
export CONNECTION_PARAMETERS=''
export CLICKHOUSE_CI_LOGS_HOST=''
export CLICKHOUSE_CI_LOGS_PASSWORD=''
fi
setup_logs_replication
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS

View File

@ -63,7 +63,6 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -94,7 +93,6 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -131,6 +129,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
| sed "s|>1<|>0<|g" \
> /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.21.38-lts (70872e9859e) FIXME as compared to v22.8.20.11-lts (c9ca79e24e8)
#### Build/Testing/Packaging Improvement
* Backported in [#53017](https://github.com/ClickHouse/ClickHouse/issues/53017): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53459](https://github.com/ClickHouse/ClickHouse/issues/53459): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,17 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.11.5-lts (5762a23a76d) FIXME as compared to v23.3.10.5-lts (d8737007f9e)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,62 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.5.5.92-stable (557edaddace) FIXME as compared to v23.5.4.25-stable (190f962abcf)
#### Performance Improvement
* Backported in [#52749](https://github.com/ClickHouse/ClickHouse/issues/52749): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#51886](https://github.com/ClickHouse/ClickHouse/issues/51886): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#52909](https://github.com/ClickHouse/ClickHouse/issues/52909): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53021](https://github.com/ClickHouse/ClickHouse/issues/53021): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53289](https://github.com/ClickHouse/ClickHouse/issues/53289): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53463](https://github.com/ClickHouse/ClickHouse/issues/53463): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)).
* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,58 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.6.3.87-stable (36911c17d0f) FIXME as compared to v23.6.2.18-stable (89f39a7ccfe)
#### Performance Improvement
* Backported in [#52751](https://github.com/ClickHouse/ClickHouse/issues/52751): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)).
#### Build/Testing/Packaging Improvement
* Backported in [#52911](https://github.com/ClickHouse/ClickHouse/issues/52911): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53023](https://github.com/ClickHouse/ClickHouse/issues/53023): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#53290](https://github.com/ClickHouse/ClickHouse/issues/53290): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53465](https://github.com/ClickHouse/ClickHouse/issues/53465): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)).
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)).
* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)).
* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)).
* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)).
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)).
* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)).
* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.7.5.30-stable (e86c21fb922) FIXME as compared to v23.7.4.5-stable (bd2fcd44553)
#### Build/Testing/Packaging Improvement
* Backported in [#53291](https://github.com/ClickHouse/ClickHouse/issues/53291): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#53467](https://github.com/ClickHouse/ClickHouse/issues/53467): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)).
* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)).
* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -39,7 +39,7 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'ordred';
mode = 'ordered';
```
Using named collections:
@ -60,7 +60,7 @@ Using named collections:
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip')
SETTINGS
mode = 'ordred';
mode = 'ordered';
```
## Settings {#s3queue-settings}
@ -188,7 +188,7 @@ Example:
CREATE TABLE s3queue_engine_table (name String, value UInt32)
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
SETTINGS
mode = 'unordred',
mode = 'unordered',
keeper_path = '/clickhouse/s3queue/';
CREATE TABLE stats (name String, value UInt32)

View File

@ -1,23 +1,21 @@
# Laion-400M dataset
The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar.
The [Laion-400M dataset](https://laion.ai/blog/laion-400-open-dataset/) contains 400 million images with English image captions. Laion nowadays provides [an even larger dataset](https://laion.ai/blog/laion-5b/) but working with it will be similar.
The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md).
The dataset contains the image URL, embeddings for both the image and the image caption, a similarity score between the image and the image caption, as well as metadata, e.g. the image width/height, the licence and a NSFW flag. We can use the dataset to demonstrate [approximate nearest neighbor search](../../engines/table-engines/mergetree-family/annindexes.md) in ClickHouse.
## Prepare data
## Data preparation
Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data.
Download data and process it with simple `download.sh` script:
The embeddings and the metadata are stored in separate files in the raw data. A data preparation step downloads the data, merges the files,
converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:
```bash
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy
python3 process.py ${1}
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding
wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata
python3 process.py ${1} # merge files and convert to CSV
```
Where `process.py`:
Script `process.py` is defined as follows:
```python
import pandas as pd
@ -35,11 +33,11 @@ im_emb = np.load(npy_file)
text_emb = np.load(text_npy)
data = pd.read_parquet(metadata_file)
# combine them
# combine files
data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False)
# you can save more columns
data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]]
# columns to be imported into ClickHouse
data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_embedding"]]
# transform np.arrays to lists
data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x))
@ -48,30 +46,32 @@ data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x))
# this small hack is needed becase caption sometimes contains all kind of quotes
data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " "))
# save data to file
# export data as CSV file
data.to_csv(str_i + '.csv', header=False)
# previous files can be removed
# removed raw data files
os.system(f"rm {npy_file} {metadata_file} {text_npy}")
```
You can download data with
To start the data preparation pipeline, run:
```bash
seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}'
```
The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits.
The dataset is split into 410 files, each file contains ca. 1 million rows. If you like to work with a smaller subset of the data, simply adjust the limits, e.g. `seq 0 9 | ...`.
## Create table for laion
## Create table
Without indexes table can be created by
To create a table without indexes, run:
```sql
CREATE TABLE laion_dataset
CREATE TABLE laion
(
`id` Int64,
`url` String,
`caption` String,
`NSFW` String,
`similarity` Float32,
`image_embedding` Array(Float32),
`text_embedding` Array(Float32)
@ -81,23 +81,23 @@ ORDER BY id
SETTINGS index_granularity = 8192
```
Fill table with data:
To import the CSV files into ClickHouse:
```sql
INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv'
INSERT INTO laion FROM INFILE '{path_to_csv_files}/*.csv'
```
## Check data in table without indexes
## Run a brute-force ANN search (without ANN index)
Let's check the work of the following query on the part of the dataset (8 million records):
To run a brute-force approximate nearest neighbor search, run:
```sql
select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30
SELECT url, caption FROM laion WHERE similarity > 0.2 ORDER BY L2Distance(image_embedding, {target:Array(Float32)}) LIMIT 30
```
Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector.
The filter on `similarity` makes sure that the images correspond to the image captions in the query results. `target` is an array of 512 elements and a client parameter. A convenient way to obtain such arrays will be presented at the end of the article. For now, we can run the embedding of a random cat picture as `target`.
**The result**
**Result**
```
┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐
@ -114,32 +114,32 @@ Since the embeddings for images and texts may not match, let's also require a ce
8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.)
```
## Add indexes
## Run a ANN with an ANN index
Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md).
Either create a new table or use [ALTER TABLE ADD INDEX](../../sql-reference/statements/alter/skipping-index.md) to add an ANN index:
```sql
CREATE TABLE laion_dataset
CREATE TABLE laion
(
`id` Int64,
`url` String,
`caption` String,
`NSFW` String,
`similarity` Float32,
`image_embedding` Array(Float32),
`text_embedding` Array(Float32),
INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000,
INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000
INDEX annoy_image image_embedding TYPE annoy(1000),
INDEX annoy_text text_embedding TYPE annoy(1000)
)
ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity = 8192
```
When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search.
Now let's check again with the same query:
By default, Annoy indexes use the L2 distance as metric. Further tuning knobs for index creation and search are described in the Annoy index [documentation](../../engines/table-engines/mergetree-family/annindexes.md). Let's check now again with the same query:
```sql
select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8
SELECT url, caption FROM test_indexes_laion WHERE similarity > 0.2 ORDER BY l2Distance(image_embedding, {target:Array(Float32)}) LIMIT 8
```
**Result**
@ -159,15 +159,18 @@ select url, caption from test_indexes_laion where similarity > 0.2 order by L2Di
8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.)
```
The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching.
The speed increased significantly at the cost of less accurate results. This is because the ANN index only provide approximate search results. Note the example searched for similar image embeddings, yet it is also possible to search for positive image caption embeddings.
## Scripts for embeddings
## Creating embeddings with UDFs
Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model.
One usually wants to create embeddings for new images or new image captions and search for similar image / image caption pairs in the data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) to create the `target` vector without leaving the client. It is important to use the same model to create the data and new embeddings for searches. The following scripts utilize the `ViT-B/32` model which also underlies the dataset.
### Text embeddings
First, store the following Python script in the `user_scripts/` directory of your ClickHouse data path and make it executable (`chmod +x encode_text.py`).
`encode_text.py`:
```python
#!/usr/bin/python3
import clip
@ -182,10 +185,12 @@ if __name__ == '__main__':
inputs = clip.tokenize(text)
with torch.no_grad():
text_features = model.encode_text(inputs)[0].tolist()
print(text_features)
sys.stdout.flush()
```
`encode_text_function.xml`:
Then create `encode_text_function.xml` in a location referenced by `<user_defined_executable_functions_config>/path/to/*_function.xml</user_defined_executable_functions_config>` in your ClickHouse server configuration file.
```xml
<functions>
<function>
@ -203,19 +208,19 @@ if __name__ == '__main__':
</functions>
```
Now we can simply use:
You can now simply use:
```sql
SELECT encode_text('cat');
```
The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries
The first run will be slow because it loads the model, but repeated runs will be fast. We can then copy the output to `SET param_target=...` and can easily write queries.
### Image embeddings
For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer)
Image embeddings can be created similarly but we will provide the Python script the path to a local image instead of the image caption text.
`encode_image.py`
`encode_picture.py`
```python
#!/usr/bin/python3
import clip
@ -231,29 +236,31 @@ if __name__ == '__main__':
image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(image)[0].tolist()
print(image_features)
print(image_features)
sys.stdout.flush()
```
`encode_picture_function.xml`
`encode_image_function.xml`
```xml
<functions>
<function>
<type>executable_pool</type>
<name>encode_picture</name>
<name>encode_image</name>
<return_type>Array(Float32)</return_type>
<argument>
<type>String</type>
<name>path</name>
</argument>
<format>TabSeparated</format>
<command>encode_picture.py</command>
<command>encode_image.py</command>
<command_read_timeout>1000000</command_read_timeout>
</function>
</functions>
```
The query:
Then run this query:
```sql
SELECT encode_picture('some/path/to/your/picture');
SELECT encode_image('/path/to/your/image');
```

View File

@ -10,6 +10,10 @@ ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-com
## Enabling the MySQL Interface On ClickHouse Cloud
:::note
The MySQL interface for ClickHouse Cloud is currently in private preview. Please contact support@clickhouse.com to enable this feature for your ClickHouse Cloud service.
:::
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
![Credentials screen - Prompt](./images/mysql1.png)

View File

@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu
To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache.
There are special settings that control this cache:
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config.
- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries.
The schema of the file can be changed by modifying the data or by changing format settings.
For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file.

View File

@ -65,9 +65,9 @@ XML substitution example:
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## Encrypting Configuration {#encryption}
## Encrypting and Hiding Configuration {#encryption}
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt.
You can use symmetric encryption to encrypt a configuration element, for example, a plaintext password or private key. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt.
Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
@ -102,6 +102,21 @@ Example:
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
Even with encrypted configuration elements, encrypted elements still appear in the preprocessed configuration file. If this is a problem for your ClickHouse deployment, we suggest two alternatives: either set file permissions of the preprocessed file to 600 or use the `hide_in_preprocessed` attribute.
Example:
```xml
<clickhouse>
<interserver_http_credentials hide_in_preprocessed="true">
<user>admin</user>
<password>secret</password>
</interserver_http_credentials>
</clickhouse>
```
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.

View File

@ -623,6 +623,19 @@ Possible values:
Default value: false
## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition}
When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts".
Possible values:
- Positive integer.
Default value: 25
The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception.
## allow_floating_point_partition_key {#allow_floating_point_partition_key}
Enables to allow floating-point number as a partition key.

View File

@ -2394,6 +2394,12 @@ Possible values:
Default value: `1`.
## use_cache_for_count_from_files {#use_cache_for_count_from_files}
Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
Enabled by default.
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- Type: seconds
@ -4637,3 +4643,19 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
│ 1.7091 │ 15008753 │
└─────────────────────┴──────────────────────────┘
```
## validate_tcp_client_information {#validate-tcp-client-information}
Determines whether validation of client information enabled when query packet is received from a client using a TCP connection.
If `true`, an exception will be thrown on invalid client information from the TCP client.
If `false`, the data will not be validated. The server will work with clients of all versions.
The default value is `false`.
**Example**
``` xml
<validate_tcp_client_information>true</validate_tcp_client_information>
```

View File

@ -30,7 +30,7 @@ curl https://clickhouse.com/ | sh
The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx).
:::
## Query data in a CSV file using SQL
## Query data in a file using SQL {#query_data_in_file}
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
@ -57,6 +57,19 @@ The `file` table function creates a table, and you can use `DESCRIBE` to see the
./clickhouse local -q "DESCRIBE file('reviews.tsv')"
```
:::tip
You are allowed to use globs in file name (See [glob substitutions](/docs/en/sql-reference/table-functions/file.md/#globs-in-path)).
Examples:
```bash
./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'"
./clickhouse local -q "SELECT * FROM 'review_?.csv'"
./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'"
```
:::
```response
marketplace Nullable(String)
customer_id Nullable(Int64)

View File

@ -1794,6 +1794,330 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md).
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
If the number of elements is negative, the array is rotated to the right.
**Syntax**
``` sql
arrayRotateLeft(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
If the number of elements is negative, the array is rotated to the left.
**Syntax**
``` sql
arrayRotateRight(arr, n)
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to rotate.
**Returned value**
- An array rotated to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Query:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Result:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the right.
**Syntax**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the left by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
New elements are filled with the provided argument or the default value of the array element type.
If the number of elements is negative, the array is shifted to the left.
**Syntax**
``` sql
arrayShiftRight(arr, n[, default])
```
**Arguments**
- `arr` — [Array](../../sql-reference/data-types/array.md).
- `n` — Number of elements to shift.
- `default` — Optional. Default value for new elements.
**Returned value**
- An array shifted to the right by the specified number of elements.
Type: [Array](../../sql-reference/data-types/array.md).
**Examples**
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Result:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Result:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Result:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Result:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Result:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -815,16 +815,16 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values:
- `microsecond` (possible abbreviations: `microseconds`, `us`, `u`)
- `millisecond` (possible abbreviations: `milliseconds`, `ms`)
- `second` (possible abbreviations: `seconds`, `ss`, `s`)
- `minute` (possible abbreviations: `minutes`, `mi`, `n`)
- `hour` (possible abbreviations: `hours`, `hh`, `h`)
- `day` (possible abbreviations: `days`, `dd`, `d`)
- `week` (possible abbreviations: `weeks`, `wk`, `ww`)
- `month` (possible abbreviations: `months`, `mm`, `m`)
- `quarter` (possible abbreviations: `quarters`, `qq`, `q`)
- `year` (possible abbreviations: `years`, `yyyy`, `yy`)
- `microsecond` (possible abbreviations: `us`, `u`)
- `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`)
- `day` (possible abbreviations: `dd`, `d`)
- `week` (possible abbreviations: `wk`, `ww`)
- `month` (possible abbreviations: `mm`, `m`)
- `quarter` (possible abbreviations: `qq`, `q`)
- `year` (possible abbreviations: `yyyy`, `yy`)
- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).

View File

@ -40,6 +40,32 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
- `IDENTIFIED BY 'qwerty'`
Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule.
```xml
<clickhouse>
<password_complexity>
<rule>
<pattern>.{12}</pattern>
<message>be at least 12 characters long</message>
</rule>
<rule>
<pattern>\p{N}</pattern>
<message>contain at least 1 numeric character</message>
</rule>
</password_complexity>
</clickhouse>
```
:::note
In ClickHouse Cloud, by default, passwords must meet the following complexity requirements:
- Be at least 12 characters long
- Contain at least 1 numeric character
- Contain at least 1 uppercase character
- Contain at least 1 lowercase character
- Contain at least 1 special character
:::
## Examples
1. The following username is `name1` and does not require a password - which obviously doesn't provide much security:

View File

@ -60,9 +60,9 @@ Specifics of each optional clause are covered in separate sections, which are li
If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`.
### COLUMNS expression
### Dynamic column selection
To match some columns in the result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression, you can use the `COLUMNS` expression.
Dynamic column selection (also known as a COLUMNS expression) allows you to match some columns in a result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression.
``` sql
COLUMNS('regexp')

View File

@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}
@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r
2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md).
3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed.
Yandex.Metrica is a web analytics service, and sample dataset doesnt cover its full functionality, so there are only two tables to create:
There are only two tables to create:
- `hits` is a table with each action done by all users on all websites covered by the service.
- `visits` is a table that contains pre-built sessions instead of individual actions.
@ -523,7 +523,7 @@ SELECT
sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits,
(100. * goal_visits) / visits AS goal_percent
FROM tutorial.visits_v1
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru')
WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403)
```
## Cluster Deployment {#cluster-deployment}
@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each:
<perftest_3shards_1replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -602,15 +602,15 @@ Example config for a cluster of one shard containing three replicas:
<perftest_1shards_3replicas>
<shard>
<replica>
<host>example-perftest01j.yandex.ru</host>
<host>example-perftest01j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest02j.yandex.ru</host>
<host>example-perftest02j.clickhouse.com</host>
<port>9000</port>
</replica>
<replica>
<host>example-perftest03j.yandex.ru</host>
<host>example-perftest03j.clickhouse.com</host>
<port>9000</port>
</replica>
</shard>
@ -628,15 +628,15 @@ ZooKeeper locations are specified in the configuration file:
``` xml
<zookeeper>
<node>
<host>zoo01.yandex.ru</host>
<host>zoo01.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo02.yandex.ru</host>
<host>zoo02.clickhouse.com</host>
<port>2181</port>
</node>
<node>
<host>zoo03.yandex.ru</host>
<host>zoo03.clickhouse.com</host>
<port>2181</port>
</node>
</zookeeper>

View File

@ -85,7 +85,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
## Шифрование {#encryption}
## Шифрование и Скрытие {#encryption}
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать.
@ -122,6 +122,21 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
Даже с применённым шифрованием в файле предобработки элементы все равно сохраняются в незашифрованном виде. В случае если это является проблемой, мы предлагаем две альтернативы: или установить разрешения на файл предобработки 600 или использовать аттрибут `hide_in_preprocessed`.
Пример:
```xml
<clickhouse>
<interserver_http_credentials hide_in_preprocessed="true">
<user>admin</user>
<password>secret</password>
</interserver_http_credentials>
</clickhouse>
```
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).

View File

@ -1923,3 +1923,19 @@ ClickHouse использует ZooKeeper для хранения метадан
- Положительное целое число.
Значение по умолчанию: `10000`.
## validate_tcp_client_information {#validate-tcp-client-information}
Включена ли валидация данных о клиенте при запросе от клиента, использующего TCP соединение.
Если `true`, то на неверные данные от клиента будет выброшено исключение.
Если `false`, то данные не будут валидироваться. Сервер будет работать с клиентами всех версий.
Значение по умолчанию: `false`.
**Пример**
``` xml
<validate_tcp_client_information>true</validate_tcp_client_information>
```

View File

@ -110,3 +110,42 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
├──────────┼──────────┤
...
```
## Запрос данных в файле с помощью SQL {#query_data_in_file}
Часто `clickhouse-local` используется для выполнения специальных запросов к файлам, когда не нужно вставлять данные в таблицу. `clickhouse-local` может транслировать данные из файла во временную таблицу и выполнить ваш SQL.
Если файл находится на той же машине, что и `clickhouse-local`, то можно просто указать файл для загрузки. Следующий файл `reviews.tsv` содержит выборку отзывов о товарах Amazon:
```bash
./clickhouse local -q "SELECT * FROM 'reviews.tsv'"
```
Эта команда является сокращением команды:
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"
```
ClickHouse знает, что файл использует формат, разделенный табуляцией, из расширения имени файла. Если необходимо явно указать формат, просто добавьте один из [множества входных форматов ClickHouse](../../interfaces/formats.md):
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')"
```
Функция таблицы `file` создает таблицу, и вы можете использовать `DESCRIBE` для просмотра предполагаемой схемы:
```bash
./clickhouse local -q "DESCRIBE file('reviews.tsv')"
```
:::tip
В имени файла разрешается использовать [Шаблоны поиска](/docs/ru/sql-reference/table-functions/file.md/#globs-in-path).
Примеры:
```bash
./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'"
./clickhouse local -q "SELECT * FROM 'review_?.csv'"
./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'"
```

View File

@ -1703,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r
│ 6 │ Float64 │
└─────┴──────────────────────────────────────────────────────────────────────────────────────────┘
```
## arrayRotateLeft
Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается вправо.
**Синтаксис**
``` sql
arrayRotateLeft(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['d','e','a','b','c'] │
└───────────────────────┘
```
## arrayRotateRight
Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Если количество элементов отрицательно, то массив поворачивается влево.
**Синтаксис**
``` sql
arrayRotateRight(arr, n)
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно повернуть массив.
**Возвращаемое значение**
- Массив, повернутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [5,6,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,1,2] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res;
```
Результат:
``` text
┌─res───────────────────┐
│ ['c','d','e','a','b'] │
└───────────────────────┘
```
## arrayShiftLeft
Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается вправо.
**Синтаксис**
``` sql
arrayShiftLeft(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов влево.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [3,4,5,6,42,42] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['d','e','f','foo','foo','foo'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [3,4,5,6,4242,4242] │
└─────────────────────┘
```
## arrayShiftRight
Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов.
Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива.
Если количество элементов отрицательно, то массив сдвигается влево.
**Синтаксис**
``` sql
arrayShiftRight(arr, n[, default])
```
**Аргументы**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
- `n` — Число элементов, на которое нужно сдвинуть массив.
- `default` — Опциональный. Значение по умолчанию для новых элементов.
**Возвращаемое значение**
- Массив, сдвинутый на заданное число элементов вправо.
Тип: [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [0,0,1,2,3,4] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res;
```
Результат:
``` text
┌─res───────────┐
│ [3,4,5,6,0,0] │
└───────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res;
```
Результат:
``` text
┌─res─────────────┐
│ [42,42,1,2,3,4] │
└─────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res;
```
Результат:
``` text
┌─res─────────────────────────────┐
│ ['foo','foo','foo','a','b','c'] │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res;
```
Результат:
``` text
┌─res─────────────────┐
│ [4242,4242,1,2,3,4] │
└─────────────────────┘
```

View File

@ -0,0 +1,103 @@
---
slug: /zh/engines/table-engines/mergetree-family/sharedmergetree
---
# SharedMergeTree {#sharedmergetree}
仅在ClickHouse Cloud以及第一方合作伙伴云服务中提供
SharedMergeTree表引擎系列是ReplicatedMergeTree引擎的云原生替代方案经过优化适用于共享对象存储例如Amazon S3、Google Cloud Storage、MinIO、Azure Blob Storage。每个特定的MergeTree引擎类型都有对应的SharedMergeTree引擎例如ReplacingSharedMergeTree替代ReplacingReplicatedMergeTree。
SharedMergeTree表引擎为ClickHouse Cloud的性能带来了显著提升。对于终端用户来说无需做任何改变即可开始使用SharedMergeTree引擎系列而不是基于ReplicatedMergeTree的引擎。它提供的好处包括
- 更高的插入吞吐量
- 后台合并的吞吐量提高
- Mutation操作的吞吐量提高
- 更快的扩容和缩容操作
- 用于选择查询的更轻量级强一致性
SharedMergeTree带来的一个重要改进是与ReplicatedMergeTree相比它提供了更彻底的计算和存储分离。下图展示了ReplicatedMergeTree如何分离计算和存储
![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-1.png)
正如您所见尽管存储在ReplicatedMergeTree中的数据位于对象存储中但元数据仍存储在每个clickhouse-server上。这意味着对于每个复制操作元数据也需要在所有副本上进行复制。
![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-2.png)
与ReplicatedMergeTree不同SharedMergeTree不需要副本之间进行通信。相反所有通信都通过共享存储和clickhouse-keeper进行。SharedMergeTree实现了异步无领导复制并使用clickhouse-keeper进行协调和元数据存储。这意味着随着服务的扩展不需要复制元数据。这可以加快复制、变更、合并和扩展操作。SharedMergeTree允许每个表有数百个副本使得无需分片即可进行动态扩展。这也意味着在ClickHouse Cloud中使用分布式查询执行方法可以利用更多的计算资源来执行查询。
## 系统监控
用于系统监控的ReplicatedMergeTree的大部分系统表system table在SharedMergeTree中也存在唯独没有`system.replication_queue`和`system.replicated_fetches`因为没有数据和元数据的复制。然而SharedMergeTree对这两个表有相应的替代表。
`system.virtual_parts`
这个表作为SharedMergeTree对 `system.replication_queue` 的替代存储关于最新的一组data parts以及未来正在进行的合并、变更和删除parts。
`system.shared_merge_tree_fetches`
这个表是SharedMergeTree对`system.replicated_fetches`的替代。它包含关于正在加载入内存的主键和校验码信息。
## 使用SharedMergeTree
SharedMergeTree已经是所有开发实例development tier中的默认表引擎并且可以通过提交支持工单在生产环境实例product tier中启用https://clickhouse.cloud/support。
对于支持SharedMergeTree表引擎的实例您不需要做任何额外变更。您可以像以前一样创建表它会自动使用基于SharedMergeTree的表引擎该引擎与您在CREATE TABLE查询中指定的引擎相对应。
通过使用 SharedMergeTree 表引擎可以创建 my_table 表。
```sql
CREATE TABLE my_table(
key UInt64,
value String
)
ENGINE = MergeTree
ORDER BY key
```
在ClickHouse Cloud中由于 `default_table_engine=MergeTree`,用户不必再特别设置`ENGINE=MergeTree`。下面的查询语句和上面的完全一样。
```sql
CREATE TABLE my_table(
key UInt64,
value String
)
ORDER BY key
```
如果您使用Replacing、Collapsing、Aggregating、Summing、VersionedCollapsing、Graphite MergeTree表它们将自动转换为相应的基于SharedMergeTree的表引擎。
```sql
CREATE TABLE myFirstReplacingMT
(
`key` Int64,
`someCol` String,
`eventTime` DateTime
)
ENGINE = ReplacingMergeTree
ORDER BY key;
```
您可以使用SHOW CREATE TABLE查看用于创建表的语句。
``` sql
SHOW CREATE TABLE myFirstReplacingMT;
```
```sql
CREATE TABLE default.myFirstReplacingMT
( `key` Int64, `someCol` String, `eventTime` DateTime )
ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
ORDER BY key
SETTINGS index_granularity = 8192
```
## 配置
一些配置的行为发生了显著的改变:
- `insert_quorum` -- 所有对SharedMergeTree的insert都是quorum insert写入共享对象存储因此在使用SharedMergeTree表引擎时不需要此设置。
- `insert_quorum_parallel` -- 所有对SharedMergeTree的insert都是quorum insert写入共享对象存储
- `select_sequential_consistency` -- 不需要quorum inserts会引起在SELECT查询中向clickhouse-keeper增加附加的请求。

Binary file not shown.

After

Width:  |  Height:  |  Size: 695 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 700 KiB

View File

@ -125,7 +125,7 @@ SELECT max2(-1, 2);
**语法**
```sql
max2(value1, value2)
min2(value1, value2)
```
**参数**

View File

@ -5,6 +5,7 @@
#include <fstream>
#include <iomanip>
#include <random>
#include <string_view>
#include <pcg_random.hpp>
#include <Poco/Util/Application.h>
#include <Common/Stopwatch.h>
@ -48,6 +49,7 @@ namespace DB
{
using Ports = std::vector<UInt16>;
static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark";
namespace ErrorCodes
{
@ -122,7 +124,7 @@ public:
default_database_, user_, password_, quota_key_,
/* cluster_= */ "",
/* cluster_secret_= */ "",
/* client_name_= */ "benchmark",
/* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
Protocol::Compression::Enable,
secure));
@ -135,6 +137,8 @@ public:
global_context->makeGlobalContext();
global_context->setSettings(settings);
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
std::cerr << std::fixed << std::setprecision(3);

View File

@ -1243,6 +1243,7 @@ void Client::processConfig()
global_context->getSettingsRef().max_insert_block_size);
}
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);

View File

@ -2,6 +2,7 @@
#include "Commands.h"
#include <Client/ReplxxLineReader.h>
#include <Client/ClientBase.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/EventNotifier.h>
#include <Common/filesystemHelpers.h>
#include <Common/ZooKeeper/ZooKeeper.h>
@ -155,6 +156,11 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
.argument("<seconds>")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("config-file", "c", "if set, will try to get a connection string from clickhouse config. default `config.xml`")
.argument("<file>")
.binding("config-file"));
options.addOption(
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
.argument("<file>")
@ -211,7 +217,14 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
}
}
Poco::Logger::root().setLevel(config().getString("log-level", "error"));
String default_log_level;
if (config().has("query"))
/// We don't want to see any information log in query mode, unless it was set explicitly
default_log_level = "error";
else
default_log_level = "information";
Poco::Logger::root().setLevel(config().getString("log-level", default_log_level));
EventNotifier::init();
}
@ -311,9 +324,39 @@ int KeeperClient::main(const std::vector<String> & /* args */)
return 0;
}
auto host = config().getString("host", "localhost");
auto port = config().getString("port", "9181");
zk_args.hosts = {host + ":" + port};
DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml"));
/// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present.
config_processor.registerEmbeddedConfig("config.xml", "<clickhouse/>");
auto clickhouse_config = config_processor.loadConfig();
Poco::Util::AbstractConfiguration::Keys keys;
clickhouse_config.configuration->keys("zookeeper", keys);
if (!config().has("host") && !config().has("port") && !keys.empty())
{
LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection");
for (const auto & key : keys)
{
String prefix = "zookeeper." + key;
String host = clickhouse_config.configuration->getString(prefix + ".host");
String port = clickhouse_config.configuration->getString(prefix + ".port");
if (clickhouse_config.configuration->has(prefix + ".secure"))
host = "secure://" + host;
zk_args.hosts.push_back(host + ":" + port);
}
}
else
{
String host = config().getString("host", "localhost");
String port = config().getString("port", "9181");
zk_args.hosts.push_back(host + ":" + port);
}
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;

View File

@ -133,8 +133,6 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyListConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/ProxyResolverConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp

View File

@ -1302,18 +1302,14 @@ try
if (structure.empty())
{
ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &)
{
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
auto file = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
return file;
};
/// stdin must be seekable
auto res = lseek(file->getFD(), 0, SEEK_SET);
if (-1 == res)
throwFromErrno("Input must be seekable file (it will be read twice).", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
SingleReadBufferIterator read_buffer_iterator(std::move(file));
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, false, context_const);
}
else

View File

@ -450,11 +450,11 @@ void checkForUsersNotInMainConfig(
/// Unused in other builds
#if defined(OS_LINUX)
static String readString(const String & path)
static String readLine(const String & path)
{
ReadBufferFromFile in(path);
String contents;
readStringUntilEOF(contents, in);
readStringUntilNewlineInto(contents, in);
return contents;
}
@ -479,9 +479,16 @@ static void sanityChecks(Server & server)
#if defined(OS_LINUX)
try
{
const std::unordered_set<std::string> fastClockSources = {
// ARM clock
"arch_sys_counter",
// KVM guest clock
"kvm-clock",
// X86 clock
"tsc",
};
const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource";
String clocksource = readString(filename);
if (clocksource.find("tsc") == std::string::npos && clocksource.find("kvm-clock") == std::string::npos)
if (!fastClockSources.contains(readLine(filename)))
server.context()->addWarningMessage("Linux is not using a fast clock source. Performance can be degraded. Check " + String(filename));
}
catch (...)
@ -501,7 +508,7 @@ static void sanityChecks(Server & server)
try
{
const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled";
if (readString(filename).find("[always]") != std::string::npos)
if (readLine(filename).find("[always]") != std::string::npos)
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename));
}
catch (...)

View File

@ -419,6 +419,8 @@
<!-- Cache size in elements for compiled expressions.-->
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<validate_tcp_client_information>false</validate_tcp_client_information>
<!-- Path to data directory, with trailing slash. -->
<path>/var/lib/clickhouse/</path>

View File

@ -100,8 +100,8 @@ private:
struct Constraint
{
SettingConstraintWritability writability = SettingConstraintWritability::WRITABLE;
Field min_value;
Field max_value;
Field min_value{};
Field max_value{};
bool operator ==(const Constraint & other) const;
bool operator !=(const Constraint & other) const { return !(*this == other); }

View File

@ -289,7 +289,7 @@ namespace
}
bool access_management = config.getBool(user_config + ".access_management", false);
bool named_collection_control = config.getBool(user_config + ".named_collection_control", false);
bool named_collection_control = config.getBool(user_config + ".named_collection_control", false) || config.getBool(user_config + ".named_collection_admin", false);
bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false);
if (grant_queries)

View File

@ -169,6 +169,10 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName());
}
/// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function
/// then destroy states (on which src places points to).
virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0;
/// Serializes state (to transmit it over the network, for example).
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
@ -506,6 +510,15 @@ public:
static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
}
void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override
{
for (size_t i = 0; i < size; ++i)
{
static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
static_cast<const Derived *>(this)->destroy(rhs_places[i] + offset);
}
}
void addBatchSinglePlace( /// NOLINT
size_t row_begin,
size_t row_end,

View File

@ -34,6 +34,7 @@ public:
static void parallelizeMergePrepare(const std::vector<UniqExactSet *> & data_vec, ThreadPool & thread_pool)
{
unsigned long single_level_set_num = 0;
unsigned long all_single_hash_size = 0;
for (auto ele : data_vec)
{
@ -41,7 +42,17 @@ public:
single_level_set_num ++;
}
if (single_level_set_num > 0 && single_level_set_num < data_vec.size())
if (single_level_set_num == data_vec.size())
{
for (auto ele : data_vec)
all_single_hash_size += ele->size();
}
/// If all the hashtables are mixed by singleLevel and twoLevel, or all singleLevel (larger than 6000 for average value), they could be converted into
/// twoLevel hashtables in parallel and then merge together. please refer to the following PR for more details.
/// https://github.com/ClickHouse/ClickHouse/pull/50748
/// https://github.com/ClickHouse/ClickHouse/pull/52973
if ((single_level_set_num > 0 && single_level_set_num < data_vec.size()) || ((all_single_hash_size/data_vec.size()) > 6000))
{
try
{

View File

@ -10,6 +10,8 @@
#include <Parsers/ASTWithAlias.h>
#include <boost/functional/hash.hpp>
namespace DB
{

View File

@ -39,6 +39,7 @@ public:
std::optional<UUID> backup_uuid;
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false;
ReadSettings read_settings;
WriteSettings write_settings;
};

View File

@ -50,7 +50,8 @@ namespace
context->getRemoteHostFilter(),
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler);
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler,
s3_uri.uri.getScheme());
client_configuration.endpointOverride = s3_uri.endpoint;
client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);

View File

@ -77,12 +77,14 @@ namespace
BackupImpl::BackupImpl(
const String & backup_name_for_logging_,
const BackupInfo & backup_info_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_)
: backup_name_for_logging(backup_name_for_logging_)
const ContextPtr & context_,
bool use_same_s3_credentials_for_base_backup_)
: backup_info(backup_info_)
, backup_name_for_logging(backup_info.toStringForLogging())
, use_archive(!archive_params_.archive_name.empty())
, archive_params(archive_params_)
, open_mode(OpenMode::READ)
@ -90,13 +92,14 @@ BackupImpl::BackupImpl(
, is_internal_backup(false)
, version(INITIAL_BACKUP_VERSION)
, base_backup_info(base_backup_info_)
, use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_)
{
open(context_);
}
BackupImpl::BackupImpl(
const String & backup_name_for_logging_,
const BackupInfo & backup_info_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -104,8 +107,10 @@ BackupImpl::BackupImpl(
bool is_internal_backup_,
const std::shared_ptr<IBackupCoordination> & coordination_,
const std::optional<UUID> & backup_uuid_,
bool deduplicate_files_)
: backup_name_for_logging(backup_name_for_logging_)
bool deduplicate_files_,
bool use_same_s3_credentials_for_base_backup_)
: backup_info(backup_info_)
, backup_name_for_logging(backup_info.toStringForLogging())
, use_archive(!archive_params_.archive_name.empty())
, archive_params(archive_params_)
, open_mode(OpenMode::WRITE)
@ -116,6 +121,7 @@ BackupImpl::BackupImpl(
, version(CURRENT_BACKUP_VERSION)
, base_backup_info(base_backup_info_)
, deduplicate_files(deduplicate_files_)
, use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_)
, log(&Poco::Logger::get("BackupImpl"))
{
open(context_);
@ -162,10 +168,16 @@ void BackupImpl::open(const ContextPtr & context)
if (base_backup_info)
{
if (use_same_s3_credentials_for_base_backup)
backup_info.copyS3CredentialsTo(*base_backup_info);
BackupFactory::CreateParams params;
params.backup_info = *base_backup_info;
params.open_mode = OpenMode::READ;
params.context = context;
/// use_same_s3_credentials_for_base_backup should be inherited for base backups
params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup;
base_backup = BackupFactory::instance().createBackup(params);
if (open_mode == OpenMode::WRITE)

View File

@ -35,14 +35,15 @@ public:
};
BackupImpl(
const String & backup_name_for_logging_,
const BackupInfo & backup_info_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_);
const ContextPtr & context_,
bool use_same_s3_credentials_for_base_backup_);
BackupImpl(
const String & backup_name_for_logging_,
const BackupInfo & backup_info_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -50,7 +51,8 @@ public:
bool is_internal_backup_,
const std::shared_ptr<IBackupCoordination> & coordination_,
const std::optional<UUID> & backup_uuid_,
bool deduplicate_files_);
bool deduplicate_files_,
bool use_same_s3_credentials_for_base_backup_);
~BackupImpl() override;
@ -109,6 +111,7 @@ private:
std::unique_ptr<SeekableReadBuffer> readFileImpl(const SizeAndChecksum & size_and_checksum, bool read_encrypted) const;
BackupInfo backup_info;
const String backup_name_for_logging;
const bool use_archive;
const ArchiveParams archive_params;
@ -145,6 +148,7 @@ private:
bool writing_finalized = false;
bool deduplicate_files = true;
bool use_same_s3_credentials_for_base_backup = false;
const Poco::Logger * log;
};

View File

@ -98,4 +98,23 @@ String BackupInfo::toStringForLogging() const
return toAST()->formatForLogging();
}
void BackupInfo::copyS3CredentialsTo(BackupInfo & dest) const
{
/// named_collection case, no need to update
if (!dest.id_arg.empty() || !id_arg.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "use_same_s3_credentials_for_base_backup is not compatible with named_collections");
if (backup_engine_name != "S3")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "use_same_s3_credentials_for_base_backup supported only for S3, got {}", toStringForLogging());
if (dest.backup_engine_name != "S3")
throw Exception(ErrorCodes::BAD_ARGUMENTS, "use_same_s3_credentials_for_base_backup supported only for S3, got {}", dest.toStringForLogging());
if (args.size() != 3)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "use_same_s3_credentials_for_base_backup requires access_key_id, secret_access_key, got {}", toStringForLogging());
auto & dest_args = dest.args;
dest_args.resize(3);
dest_args[1] = args[1];
dest_args[2] = args[2];
}
}

View File

@ -23,6 +23,8 @@ struct BackupInfo
static BackupInfo fromAST(const IAST & ast);
String toStringForLogging() const;
void copyS3CredentialsTo(BackupInfo & dest) const;
};
}

View File

@ -27,6 +27,7 @@ namespace ErrorCodes
M(Bool, decrypt_files_from_encrypted_disks) \
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \
M(UInt64, replica_num) \

View File

@ -44,6 +44,9 @@ struct BackupSettings
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
bool allow_s3_native_copy = true;
/// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false;
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache.
bool read_from_filesystem_cache = true;

View File

@ -386,6 +386,7 @@ void BackupsWorker::doBackup(
backup_create_params.backup_uuid = backup_settings.backup_uuid;
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context);
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
@ -693,6 +694,7 @@ void BackupsWorker::doRestore(
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy;
backup_open_params.use_same_s3_credentials_for_base_backup = restore_settings.use_same_s3_credentials_for_base_backup;
backup_open_params.read_settings = getReadSettingsForRestore(context);
backup_open_params.write_settings = getWriteSettingsForRestore(context);
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);

View File

@ -163,6 +163,7 @@ namespace
M(Bool, allow_unresolved_access_dependencies) \
M(RestoreUDFCreationMode, create_function) \
M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, internal) \
M(String, host_id) \
M(OptionalString, storage_policy) \

View File

@ -110,6 +110,9 @@ struct RestoreSettings
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
bool allow_s3_native_copy = true;
/// Whether base backup from S3 should inherit credentials from the RESTORE query.
bool use_same_s3_credentials_for_base_backup = false;
/// Internal, should not be specified by user.
bool internal = false;

View File

@ -47,7 +47,6 @@ void registerBackupEngineS3(BackupFactory & factory)
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
#if USE_AWS_S3
String backup_name_for_logging = params.backup_info.toStringForLogging();
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
@ -115,7 +114,13 @@ void registerBackupEngineS3(BackupFactory & factory)
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
reader,
params.context,
params.use_same_s3_credentials_for_base_backup);
}
else
{
@ -129,7 +134,7 @@ void registerBackupEngineS3(BackupFactory & factory)
params.context);
return std::make_unique<BackupImpl>(
backup_name_for_logging,
params.backup_info,
archive_params,
params.base_backup_info,
writer,
@ -137,7 +142,8 @@ void registerBackupEngineS3(BackupFactory & factory)
params.is_internal_backup,
params.backup_coordination,
params.backup_uuid,
params.deduplicate_files);
params.deduplicate_files,
params.use_same_s3_credentials_for_base_backup);
}
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "S3 support is disabled");

View File

@ -103,7 +103,6 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
{
auto creator_fn = [](const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
String backup_name_for_logging = params.backup_info.toStringForLogging();
const String & engine_name = params.backup_info.backup_engine_name;
if (!params.backup_info.id_arg.empty())
@ -172,7 +171,13 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
reader = std::make_shared<BackupReaderFile>(path, params.read_settings, params.write_settings);
else
reader = std::make_shared<BackupReaderDisk>(disk, path, params.read_settings, params.write_settings);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(
params.backup_info,
archive_params,
params.base_backup_info,
reader,
params.context,
params.use_same_s3_credentials_for_base_backup);
}
else
{
@ -182,7 +187,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
else
writer = std::make_shared<BackupWriterDisk>(disk, path, params.read_settings, params.write_settings);
return std::make_unique<BackupImpl>(
backup_name_for_logging,
params.backup_info,
archive_params,
params.base_backup_info,
writer,
@ -190,7 +195,8 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
params.is_internal_backup,
params.backup_coordination,
params.backup_uuid,
params.deduplicate_files);
params.deduplicate_files,
params.use_same_s3_credentials_for_base_backup);
}
};

View File

@ -169,6 +169,10 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries(clickhouse_storages_system PRIVATE ch_contrib::azure_sdk)
endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common)

View File

@ -1471,8 +1471,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
sendDataFromPipe(
std::move(pipe),
parsed_query,
have_data_in_stdin
);
have_data_in_stdin);
}
catch (Exception & e)
{

View File

@ -1,5 +1,6 @@
#pragma once
#include <string_view>
#include "Common/NamePrompter.h"
#include <Parsers/ASTCreateQuery.h>
#include <Common/ProgressIndication.h>
@ -24,6 +25,7 @@ namespace po = boost::program_options;
namespace DB
{
static constexpr std::string_view DEFAULT_CLIENT_NAME = "client";
static const NameSet exit_strings
{

View File

@ -12,6 +12,7 @@
#include <IO/TimeoutSetter.h>
#include <Formats/NativeReader.h>
#include <Formats/NativeWriter.h>
#include <Client/ClientBase.h>
#include <Client/Connection.h>
#include <Client/ConnectionParameters.h>
#include <Common/ClickHouseRevision.h>
@ -885,7 +886,7 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
return sink;
});
executor = pipeline.execute();
executor->execute(/*num_threads = */ 1);
executor->execute(/*num_threads = */ 1, false);
auto read_rows = sink->getNumReadRows();
rows += read_rows;
@ -1204,7 +1205,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
parameters.quota_key,
"", /* cluster */
"", /* cluster_secret */
"client",
std::string(DEFAULT_CLIENT_NAME),
parameters.compression,
parameters.security);
}

View File

@ -559,6 +559,7 @@ FieldInfo ColumnObject::Subcolumn::getFieldInfo() const
.have_nulls = base_type->isNullable(),
.need_convert = false,
.num_dimensions = least_common_type.getNumberOfDimensions(),
.need_fold_dimension = false,
};
}

View File

@ -182,6 +182,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
init.max_threads,
/* max_free_threads = */ 0,
init.max_threads),
.ready_queue = {},
.max_threads = init.max_threads
});
}

Some files were not shown because too many files have changed in this diff Show More