diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index e662a5b6f98..05654926fd7 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -205,6 +205,12 @@ jobs: with: build_name: binary_amd64_compat checkout_depth: 0 + BuilderBinAmd64Musl: + needs: [DockerHubPush] + uses: ./.github/workflows/reusable_build.yml + with: + build_name: binary_amd64_musl + checkout_depth: 0 BuilderBinAarch64V80Compat: needs: [DockerHubPush] uses: ./.github/workflows/reusable_build.yml diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f8f052d9226..a6631a93766 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -242,6 +242,11 @@ jobs: uses: ./.github/workflows/reusable_build.yml with: build_name: binary_amd64_compat + BuilderBinAmd64Musl: + needs: [FastTest, StyleCheck] + uses: ./.github/workflows/reusable_build.yml + with: + build_name: binary_amd64_musl BuilderBinAarch64V80Compat: needs: [FastTest, StyleCheck] uses: ./.github/workflows/reusable_build.yml @@ -532,6 +537,11 @@ jobs: run_command: | cd "$REPO_COPY/tests/ci" + mkdir -p "${REPORTS_PATH}/integration" + mkdir -p "${REPORTS_PATH}/stateless" + cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/integration + cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/stateless + TEMP_PATH="${TEMP_PATH}/integration" \ REPORTS_PATH="${REPORTS_PATH}/integration" \ python3 integration_test_check.py "Integration $CHECK_NAME" \ diff --git a/.gitmodules b/.gitmodules index af90c788012..4b86f0468f1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -354,6 +354,6 @@ [submodule "contrib/aklomp-base64"] path = contrib/aklomp-base64 url = https://github.com/aklomp/base64.git -[submodule "contrib/pocketfft"] - path = contrib/pocketfft - url = https://github.com/mreineck/pocketfft.git +[submodule "contrib/sqids-cpp"] + path = contrib/sqids-cpp + url = https://github.com/sqids/sqids-cpp.git diff --git a/CHANGELOG.md b/CHANGELOG.md index aa40012617c..ca5c7a5eaf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.11, 2023-12-06](#2311)**
**[ClickHouse release v23.10, 2023-11-02](#2310)**
**[ClickHouse release v23.9, 2023-09-28](#239)**
**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
@@ -13,7 +14,218 @@ # 2023 Changelog -### ClickHouse release 23.10, 2023-11-02 +### ClickHouse release 23.11, 2023-12-06 + +#### Backward Incompatible Change +* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Multiple improvements for `RESPECT NULLS`/`IGNORE NULLS` for window functions. If you use them as aggregate functions and store the states of aggregate functions with these modifiers, they might become incompatible. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). +* Remove optimization `optimize_move_functions_out_of_any`. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). +* Formatters `%l`/`%k`/`%c` in function `parseDateTime` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). +* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with `Ordinary`, `Atomic` and `Replicated` engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added a new table `system.asynchronous_loader` for introspection. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). +* Add system table `blob_storage_log`. It allows auditing all the data written to S3 and other object storages. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). +* Use statistics to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). +* Added support for compression in the Keeper's protocol. It can be enabled on the ClickHouse side by using this flag `use_compression` inside `zookeeper` section. Keep in mind that only ClickHouse Keeper supports compression, while Apache ZooKeeper does not. Resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with the new format. With that format ClickHouse stores full remote object key in the metadata file which allows better flexibility and optimization. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). +* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add `hostname` column to all system log tables - it is useful if you make the system tables replicated, shared, or distributed. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). +* Added function `fromDaysSinceYearZero` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). +* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). +* Implement a new setting called `preferred_optimize_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible instead of choosing from all the candidates. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. `SELECT arrayReduce('groupArraySample(3)', range(10))`. [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). +* Added support for `Float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added a system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). +* Introduce `fileCluster` table function - it is useful if you mount a shared filesystem (NFS and similar) into the `user_files` directory. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). +* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). +* Expose the number of errors for each error code occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). +* Make ALTER materialized_view MODIFY QUERY non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). +* Setting `join_algorithm` respects specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). +* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### Performance Improvement +* Adaptive timeouts for interacting with S3. The first attempt is made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). +* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support parallel evaluation of window functions. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). +* `Numbers` table engine (of the `system.numbers` table) now analyzes the condition to generate the needed subset of data, like table's index. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). +* Improved the performance of filtering by `IN (...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). +* An improvement which takes place when the filesystem cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add ability to disable checksums for S3 to avoid excessive pass over the file (this is controlled by the setting `s3_disable_checksum`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). +* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, it doesn't require synchronisation inside the thread pool, and doesn't hesitate to do `seek`-s on local FS, and reduces CPU wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). +* Optimization for getting value from `map`, `arrayElement`. It will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). +* Optimization of multi-stage filtering with AVX-512. The performance experiments of the OnTime dataset on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of 7.4%, 5.9%, 4.7%, 3.0%, and 4.6% to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Decrease the amount of virtual function calls in window functions. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow recursive Tuple field pruning in ORC data format to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). +* Trivial count optimization for `Npy` data format: queries like `select count() from 'data.npy'` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of executing queries for use cases with many users and highly concurrent queries (>2000 QPS) by optimizing the access to ProcessList. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). +* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). +* There are cases when stack unwinding was slow. Not anymore. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). +* Improved performance of glob matching for `File` and `HDFS` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). +* Posting lists in experimental full text indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Parallelise `BackupEntriesCollector` in backups. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). +* Support creating and materializing index in the same alter query, also support "modify TTL" and "materialize TTL" in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). +* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). +* Engine `Merge` filters the records according to the row policies of the underlying tables, so you don't have to create another row policy on a `Merge` table. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). +* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). +* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). +* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). +* While read data from a csv format file, and at end of line is `\r` , which not followed by `\n`, then we will enconter the exception as follows `Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.` In clickhouse, the csv end of line must be `\n` or `\r\n` or `\n\r`, so the `\r` must be followed by `\n`, but in some suitation, the csv input data is abnormal, like above, `\r` is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). +* Update Arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed accounting of memory allocated before attaching a thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). +* Add support for `LARGE_LIST` in Apache Arrow formats. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). +* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). +* Add ability to specify BlockBasedTableOptions for `EmbeddedRocksDB` tables. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). +* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed handling of non-const query strings in functions `formatQuery`/ `formatQuerySingleLine`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). +* Fix translation to MySQL compatible query for querying external databases. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). +* Add support for backing up and restoring tables using `KeeperMap` engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). +* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). +* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). +* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). +* Rewrite equality with `is null` check in JOIN ON section. Experimental *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). +* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). +* Allow getting cache configuration from 'named_collection' section in config or from SQL created named collections. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `query_masking_rules` when reloading the config ([#56449](https://github.com/ClickHouse/ClickHouse/issues/56449)). [#56573](https://github.com/ClickHouse/ClickHouse/pull/56573) ([Mikhail Koviazin](https://github.com/mkmkme)). +* PostgreSQL database engine: Make the removal of outdated tables less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). +* It took too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). +* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). +* Add config setting `wait_dictionaries_load_at_startup`. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). +* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). +* Fix possible postgresql logical replication conversion error when using experimental `MaterializedPostgreSQL`. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). +* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). +* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). +* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). +* LZ4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). +* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a setting for PostgreSQL table engine setting in the config file. Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). +* Function `concat` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). +* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). +* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add `system.s3queue_log` to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). +* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). +* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not demangle and symbolize stack frames from `functional` C++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). +* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). +* The `max_memory_usage_in_client` command line option supports a string value with a suffix (K, M, G, etc). Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). +* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). +* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Add SQLLogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). +* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). +* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). +* Add an option to disable `libssh`. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable temporary_data_in_cache in S3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). +* Set the max memory usage for clickhouse-client (`1G`) in the CI. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix exerimental Analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix a bug in `str_to_map` function. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). +* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). +* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). +* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). +* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). +* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). +* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). +* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). +* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). +* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). +* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). +* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). +* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix `tryBase64Decode` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). +* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). +* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). +* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). +* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). +* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). +* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). +* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* FS cache: add a limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). + + +### ClickHouse release 23.10, 2023-11-02 #### Backward Incompatible Change * There is no longer an option to automatically remove broken data parts. This closes [#55174](https://github.com/ClickHouse/ClickHouse/issues/55174). [#55184](https://github.com/ClickHouse/ClickHouse/pull/55184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#55557](https://github.com/ClickHouse/ClickHouse/pull/55557) ([Jihyuk Bok](https://github.com/tomahawk28)). @@ -39,7 +251,7 @@ * Allow to drop cache for Protobuf format with `SYSTEM DROP SCHEMA FORMAT CACHE [FOR Protobuf]`. [#55064](https://github.com/ClickHouse/ClickHouse/pull/55064) ([Aleksandr Musorin](https://github.com/AVMusorin)). * Add external HTTP Basic authenticator. [#55199](https://github.com/ClickHouse/ClickHouse/pull/55199) ([Aleksei Filatov](https://github.com/aalexfvk)). * Added function `byteSwap` which reverses the bytes of unsigned integers. This is particularly useful for reversing values of types which are represented as unsigned integers internally such as IPv4. [#55211](https://github.com/ClickHouse/ClickHouse/pull/55211) ([Priyansh Agrawal](https://github.com/Priyansh121096)). -* Added function `formatQuery()` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine()` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added function `formatQuery` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * Added `DWARF` input format that reads debug symbols from an ELF executable/library/object file. [#55450](https://github.com/ClickHouse/ClickHouse/pull/55450) ([Michael Kolupaev](https://github.com/al13n321)). * Allow to save unparsed records and errors in RabbitMQ, NATS and FileLog engines. Add virtual columns `_error` and `_raw_message`(for NATS and RabbitMQ), `_raw_record` (for FileLog) that are filled when ClickHouse fails to parse new record. The behaviour is controlled under storage settings `nats_handle_error_mode` for NATS, `rabbitmq_handle_error_mode` for RabbitMQ, `handle_error_mode` for FileLog similar to `kafka_handle_error_mode`. If it's set to `default`, en exception will be thrown when ClickHouse fails to parse a record, if it's set to `stream`, erorr and raw record will be saved into virtual columns. Closes [#36035](https://github.com/ClickHouse/ClickHouse/issues/36035). [#55477](https://github.com/ClickHouse/ClickHouse/pull/55477) ([Kruglov Pavel](https://github.com/Avogar)). * Keeper client improvement: add `get_all_children_number command` that returns number of all children nodes under a specific path. [#55485](https://github.com/ClickHouse/ClickHouse/pull/55485) ([guoxiaolong](https://github.com/guoxiaolongzte)). @@ -74,11 +286,11 @@ * Reduced memory consumption during loading of hierarchical dictionaries. [#55838](https://github.com/ClickHouse/ClickHouse/pull/55838) ([Nikita Taranov](https://github.com/nickitat)). * All dictionaries support setting `dictionary_use_async_executor`. [#55839](https://github.com/ClickHouse/ClickHouse/pull/55839) ([vdimir](https://github.com/vdimir)). * Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData. [#55947](https://github.com/ClickHouse/ClickHouse/pull/55947) ([Raúl Marín](https://github.com/Algunenano)). -* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount()`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). -* Add setting `optimize_trivial_approximate_count_query` to use `count()` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). +* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). +* Add setting `optimize_trivial_approximate_count_query` to use `count` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). #### Improvement -* Functions `toDayOfWeek()` (MySQL alias: `DAYOFWEEK()`), `toYearWeek()` (`YEARWEEK()`) and `toWeek()` (`WEEK()`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). +* Functions `toDayOfWeek` (MySQL alias: `DAYOFWEEK`), `toYearWeek` (`YEARWEEK`) and `toWeek` (`WEEK`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). * Introduced setting `date_time_overflow_behavior` with possible values `ignore`, `throw`, `saturate` that controls the overflow behavior when converting from Date, Date32, DateTime64, Integer or Float to Date, Date32, DateTime or DateTime64. [#55696](https://github.com/ClickHouse/ClickHouse/pull/55696) ([Andrey Zvonov](https://github.com/zvonand)). * Implement query parameters support for `ALTER TABLE ... ACTION PARTITION [ID] {parameter_name:ParameterType}`. Merges [#49516](https://github.com/ClickHouse/ClickHouse/issues/49516). Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#55604](https://github.com/ClickHouse/ClickHouse/pull/55604) ([alesapin](https://github.com/alesapin)). * Print processor ids in a prettier manner in EXPLAIN. [#48852](https://github.com/ClickHouse/ClickHouse/pull/48852) ([Vlad Seliverstov](https://github.com/behebot)). @@ -112,7 +324,7 @@ * Functions `(add|subtract)(Year|Quarter|Month|Week|Day|Hour|Minute|Second|Millisecond|Microsecond|Nanosecond)` now support string-encoded date arguments, e.g. `SELECT addDays('2023-10-22', 1)`. This increases compatibility with MySQL and is needed by Tableau Online. [#55869](https://github.com/ClickHouse/ClickHouse/pull/55869) ([Robert Schulze](https://github.com/rschu1ze)). * The setting `apply_deleted_mask` when disabled allows to read rows that where marked as deleted by lightweight DELETE queries. This is useful for debugging. [#55952](https://github.com/ClickHouse/ClickHouse/pull/55952) ([Alexander Gololobov](https://github.com/davenger)). * Allow skipping `null` values when serailizing Tuple to json objects, which makes it possible to keep compatiability with Spark's `to_json` function, which is also useful for gluten. [#55956](https://github.com/ClickHouse/ClickHouse/pull/55956) ([李扬](https://github.com/taiyang-li)). -* Functions `(add|sub)Date()` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). +* Functions `(add|sub)Date` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). * Allow unquoted strings with CR (`\r`) in CSV format. Closes [#39930](https://github.com/ClickHouse/ClickHouse/issues/39930). [#56046](https://github.com/ClickHouse/ClickHouse/pull/56046) ([Kruglov Pavel](https://github.com/Avogar)). * Allow to run `clickhouse-keeper` using embedded config. [#56086](https://github.com/ClickHouse/ClickHouse/pull/56086) ([Maksim Kita](https://github.com/kitaisreal)). * Set limit of the maximum configuration value for `queued.min.messages` to avoid problem with start fetching data with Kafka. [#56121](https://github.com/ClickHouse/ClickHouse/pull/56121) ([Stas Morozov](https://github.com/r3b-fish)). @@ -133,7 +345,7 @@ * Fixed bug of `match` function (regex) with pattern containing alternation produces incorrect key condition. Closes #53222. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Fix 'Cannot find column' in read-in-order optimization with ARRAY JOIN [#51746](https://github.com/ClickHouse/ClickHouse/pull/51746) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Support missed experimental `Object(Nullable(json))` subcolumns in query. [#54052](https://github.com/ClickHouse/ClickHouse/pull/54052) ([zps](https://github.com/VanDarkholme7)). -* Re-add fix for `accurateCastOrNull()` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Re-add fix for `accurateCastOrNull` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * Fix detecting `DEFAULT` for columns of a Distributed table created without AS [#55060](https://github.com/ClickHouse/ClickHouse/pull/55060) ([Vitaly Baranov](https://github.com/vitlibar)). * Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). * Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). @@ -191,7 +403,7 @@ * Add error handler to odbc-bridge [#56185](https://github.com/ClickHouse/ClickHouse/pull/56185) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -### ClickHouse release 23.9, 2023-09-28 +### ClickHouse release 23.9, 2023-09-28 #### Backward Incompatible Change * Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -213,7 +425,7 @@ * Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). * Added `peak_threads_usage` to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). * Add `SHOW FUNCTIONS` support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). -* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero()` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). +* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). * Added functions `YYYYMMDDtoDate`, `YYYYMMDDtoDate32`, `YYYYMMDDhhmmssToDateTime` and `YYYYMMDDhhmmssToDateTime64`. They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions `YYYYMMDDToDate`, `YYYYMMDDToDateTime`, `YYYYMMDDhhmmddToDateTime`, `YYYYMMDDhhmmddToDateTime64`. [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Quanfa Fu](https://github.com/dentiscalprum)) ([Robert Schulze](https://github.com/rschu1ze)). * Add several string distance functions, including `byteHammingDistance`, `editDistance`. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). * Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). @@ -229,7 +441,7 @@ * An optimization to rewrite `COUNT(DISTINCT ...)` and various `uniq` variants to `count` if it is selected from a subquery with GROUP BY. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). * Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc` - and it slightly improves performance. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). * Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). -* Since we use separate instructions for executing `toString()` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Since we use separate instructions for executing `toString` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). * Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). * Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). * Improve JSON SQL functions by reusing `GeneratorJSONPath` and removing several shared pointers. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). @@ -479,7 +691,7 @@ * The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). * Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). * Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* `SCHEMA` was added as alias for `DATABASE` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). * Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). @@ -741,7 +953,7 @@ * Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). * Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). * Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). -* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `countSubstrings` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). * Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). * Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). * Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). @@ -1585,7 +1797,7 @@ * A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). * Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). * Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Should use `select_query_typed.limitByOffset()` instead of `select_query_typed.limitOffset()`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). +* Should use `select_query_typed.limitByOffset` instead of `select_query_typed.limitOffset`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). * When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). * Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). diff --git a/README.md b/README.md index 7642cb100ed..bf8ef0b4e98 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ curl https://clickhouse.com/ | sh * [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30 * [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11 +* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12 * [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. diff --git a/SECURITY.md b/SECURITY.md index 5477628cee4..7aaf9f3e5b9 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,6 +13,7 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.11 | ✔️ | | 23.10 | ✔️ | | 23.9 | ✔️ | | 23.8 | ✔️ | diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index 49bb81a58be..738cda47877 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -30,7 +30,6 @@ int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid) } -#include #include #include diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index d0dde8a51a5..dfb41a33fb1 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -55,7 +55,6 @@ set (SRCS src/DigestStream.cpp src/DirectoryIterator.cpp src/DirectoryIteratorStrategy.cpp - src/DirectoryWatcher.cpp src/Environment.cpp src/Error.cpp src/ErrorHandler.cpp diff --git a/base/poco/Foundation/include/Poco/DirectoryWatcher.h b/base/poco/Foundation/include/Poco/DirectoryWatcher.h deleted file mode 100644 index 00964a5512a..00000000000 --- a/base/poco/Foundation/include/Poco/DirectoryWatcher.h +++ /dev/null @@ -1,228 +0,0 @@ -// -// DirectoryWatcher.h -// -// Library: Foundation -// Package: Filesystem -// Module: DirectoryWatcher -// -// Definition of the DirectoryWatcher class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_DirectoryWatcher_INCLUDED -#define Foundation_DirectoryWatcher_INCLUDED - - -#include "Poco/Foundation.h" - - -#ifndef POCO_NO_INOTIFY - - -# include "Poco/AtomicCounter.h" -# include "Poco/BasicEvent.h" -# include "Poco/File.h" -# include "Poco/Runnable.h" -# include "Poco/Thread.h" - - -namespace Poco -{ - - -class DirectoryWatcherStrategy; - - -class Foundation_API DirectoryWatcher : protected Runnable -/// This class is used to get notifications about changes -/// to the filesystem, more specifically, to a specific -/// directory. Changes to a directory are reported via -/// events. -/// -/// A thread will be created that watches the specified -/// directory for changes. Events are reported in the context -/// of this thread. -/// -/// Note that changes to files in subdirectories of the watched -/// directory are not reported. Separate DirectoryWatcher objects -/// must be created for these directories if they should be watched. -/// -/// Changes to file attributes are not reported. -/// -/// On Windows, this class is implemented using FindFirstChangeNotification()/FindNextChangeNotification(). -/// On Linux, this class is implemented using inotify. -/// On FreeBSD and Darwin (Mac OS X, iOS), this class uses kevent/kqueue. -/// On all other platforms, the watched directory is periodically scanned -/// for changes. This can negatively affect performance if done too often. -/// Therefore, the interval in which scans are done can be specified in -/// the constructor. Note that periodic scanning will also be done on FreeBSD -/// and Darwin if events for changes to files (DW_ITEM_MODIFIED) are enabled. -/// -/// DW_ITEM_MOVED_FROM and DW_ITEM_MOVED_TO events will only be reported -/// on Linux. On other platforms, a file rename or move operation -/// will be reported via a DW_ITEM_REMOVED and a DW_ITEM_ADDED event. -/// The order of these two events is not defined. -/// -/// An event mask can be specified to enable only certain events. -{ -public: - enum DirectoryEventType - { - DW_ITEM_ADDED = 1, - /// A new item has been created and added to the directory. - - DW_ITEM_REMOVED = 2, - /// An item has been removed from the directory. - - DW_ITEM_MODIFIED = 4, - /// An item has been modified. - - DW_ITEM_MOVED_FROM = 8, - /// An item has been renamed or moved. This event delivers the old name. - - DW_ITEM_MOVED_TO = 16 - /// An item has been renamed or moved. This event delivers the new name. - }; - - enum DirectoryEventMask - { - DW_FILTER_ENABLE_ALL = 31, - /// Enables all event types. - - DW_FILTER_DISABLE_ALL = 0 - /// Disables all event types. - }; - - enum - { - DW_DEFAULT_SCAN_INTERVAL = 5 /// Default scan interval for platforms that don't provide a native notification mechanism. - }; - - struct DirectoryEvent - { - DirectoryEvent(const File & f, DirectoryEventType ev) : item(f), event(ev) { } - - const File & item; /// The directory or file that has been changed. - DirectoryEventType event; /// The kind of event. - }; - - BasicEvent itemAdded; - /// Fired when a file or directory has been created or added to the directory. - - BasicEvent itemRemoved; - /// Fired when a file or directory has been removed from the directory. - - BasicEvent itemModified; - /// Fired when a file or directory has been modified. - - BasicEvent itemMovedFrom; - /// Fired when a file or directory has been renamed. This event delivers the old name. - - BasicEvent itemMovedTo; - /// Fired when a file or directory has been moved. This event delivers the new name. - - BasicEvent scanError; - /// Fired when an error occurs while scanning for changes. - - DirectoryWatcher(const std::string & path, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL); - /// Creates a DirectoryWatcher for the directory given in path. - /// To enable only specific events, an eventMask can be specified by - /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED). - /// On platforms where no native filesystem notifications are available, - /// scanInterval specifies the interval in seconds between scans - /// of the directory. - - DirectoryWatcher(const File & directory, int eventMask = DW_FILTER_ENABLE_ALL, int scanInterval = DW_DEFAULT_SCAN_INTERVAL); - /// Creates a DirectoryWatcher for the specified directory - /// To enable only specific events, an eventMask can be specified by - /// OR-ing the desired event IDs (e.g., DW_ITEM_ADDED | DW_ITEM_MODIFIED). - /// On platforms where no native filesystem notifications are available, - /// scanInterval specifies the interval in seconds between scans - /// of the directory. - - ~DirectoryWatcher(); - /// Destroys the DirectoryWatcher. - - void suspendEvents(); - /// Suspends sending of events. Can be called multiple times, but every - /// call to suspendEvent() must be matched by a call to resumeEvents(). - - void resumeEvents(); - /// Resumes events, after they have been suspended with a call to suspendEvents(). - - bool eventsSuspended() const; - /// Returns true iff events are suspended. - - int eventMask() const; - /// Returns the value of the eventMask passed to the constructor. - - int scanInterval() const; - /// Returns the scan interval in seconds. - - const File & directory() const; - /// Returns the directory being watched. - - bool supportsMoveEvents() const; - /// Returns true iff the platform supports DW_ITEM_MOVED_FROM/itemMovedFrom and - /// DW_ITEM_MOVED_TO/itemMovedTo events. - -protected: - void init(); - void stop(); - void run(); - -private: - DirectoryWatcher(); - DirectoryWatcher(const DirectoryWatcher &); - DirectoryWatcher & operator=(const DirectoryWatcher &); - - Thread _thread; - File _directory; - int _eventMask; - AtomicCounter _eventsSuspended; - int _scanInterval; - DirectoryWatcherStrategy * _pStrategy; -}; - - -// -// inlines -// - - -inline bool DirectoryWatcher::eventsSuspended() const -{ - return _eventsSuspended.value() > 0; -} - - -inline int DirectoryWatcher::eventMask() const -{ - return _eventMask; -} - - -inline int DirectoryWatcher::scanInterval() const -{ - return _scanInterval; -} - - -inline const File & DirectoryWatcher::directory() const -{ - return _directory; -} - - -} // namespace Poco - - -#endif // POCO_NO_INOTIFY - - -#endif // Foundation_DirectoryWatcher_INCLUDED diff --git a/base/poco/Foundation/src/DirectoryWatcher.cpp b/base/poco/Foundation/src/DirectoryWatcher.cpp deleted file mode 100644 index b559da65e09..00000000000 --- a/base/poco/Foundation/src/DirectoryWatcher.cpp +++ /dev/null @@ -1,602 +0,0 @@ -// -// DirectoryWatcher.cpp -// -// Library: Foundation -// Package: Filesystem -// Module: DirectoryWatcher -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/DirectoryWatcher.h" - - -#ifndef POCO_NO_INOTIFY - - -#include "Poco/Path.h" -#include "Poco/Glob.h" -#include "Poco/DirectoryIterator.h" -#include "Poco/Event.h" -#include "Poco/Exception.h" -#include "Poco/Buffer.h" -#if POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - #include - #include - #include -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - #include - #include - #include - #include - #include - #if (POCO_OS == POCO_OS_FREE_BSD) && !defined(O_EVTONLY) - #define O_EVTONLY 0x8000 - #endif -#endif -#include -#include -#include - -namespace Poco { - - -class DirectoryWatcherStrategy -{ -public: - DirectoryWatcherStrategy(DirectoryWatcher& owner): - _owner(owner) - { - } - - virtual ~DirectoryWatcherStrategy() - { - } - - DirectoryWatcher& owner() - { - return _owner; - } - - virtual void run() = 0; - virtual void stop() = 0; - virtual bool supportsMoveEvents() const = 0; - -protected: - struct ItemInfo - { - ItemInfo(): - size(0) - { - } - - ItemInfo(const ItemInfo& other): - path(other.path), - size(other.size), - lastModified(other.lastModified) - { - } - - explicit ItemInfo(const File& f): - path(f.path()), - size(f.isFile() ? f.getSize() : 0), - lastModified(f.getLastModified()) - { - } - - std::string path; - File::FileSize size; - Timestamp lastModified; - }; - typedef std::map ItemInfoMap; - - void scan(ItemInfoMap& entries) - { - DirectoryIterator it(owner().directory()); - DirectoryIterator end; - while (it != end) - { - entries[it.path().getFileName()] = ItemInfo(*it); - ++it; - } - } - - void compare(ItemInfoMap& oldEntries, ItemInfoMap& newEntries) - { - for (ItemInfoMap::iterator itn = newEntries.begin(); itn != newEntries.end(); ++itn) - { - ItemInfoMap::iterator ito = oldEntries.find(itn->first); - if (ito != oldEntries.end()) - { - if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && !owner().eventsSuspended()) - { - if (itn->second.size != ito->second.size || itn->second.lastModified != ito->second.lastModified) - { - Poco::File f(itn->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED); - owner().itemModified(&owner(), ev); - } - } - oldEntries.erase(ito); - } - else if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED) && !owner().eventsSuspended()) - { - Poco::File f(itn->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED); - owner().itemAdded(&owner(), ev); - } - } - if ((owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED) && !owner().eventsSuspended()) - { - for (ItemInfoMap::iterator it = oldEntries.begin(); it != oldEntries.end(); ++it) - { - Poco::File f(it->second.path); - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED); - owner().itemRemoved(&owner(), ev); - } - } - } - -private: - DirectoryWatcherStrategy(); - DirectoryWatcherStrategy(const DirectoryWatcherStrategy&); - DirectoryWatcherStrategy& operator = (const DirectoryWatcherStrategy&); - - DirectoryWatcher& _owner; -}; - - -#if POCO_OS == POCO_OS_WINDOWS_NT - - -class WindowsDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - WindowsDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner) - { - _hStopped = CreateEventW(NULL, FALSE, FALSE, NULL); - if (!_hStopped) - throw SystemException("cannot create event"); - } - - ~WindowsDirectoryWatcherStrategy() - { - CloseHandle(_hStopped); - } - - void run() - { - ItemInfoMap entries; - scan(entries); - - DWORD filter = FILE_NOTIFY_CHANGE_FILE_NAME | FILE_NOTIFY_CHANGE_DIR_NAME; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) - filter |= FILE_NOTIFY_CHANGE_SIZE | FILE_NOTIFY_CHANGE_LAST_WRITE; - - std::string path(owner().directory().path()); - HANDLE hChange = FindFirstChangeNotificationA(path.c_str(), FALSE, filter); - - if (hChange == INVALID_HANDLE_VALUE) - { - try - { - FileImpl::handleLastErrorImpl(path); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - return; - } - - bool stopped = false; - while (!stopped) - { - try - { - HANDLE h[2]; - h[0] = _hStopped; - h[1] = hChange; - switch (WaitForMultipleObjects(2, h, FALSE, INFINITE)) - { - case WAIT_OBJECT_0: - stopped = true; - break; - case WAIT_OBJECT_0 + 1: - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - if (FindNextChangeNotification(hChange) == FALSE) - { - FileImpl::handleLastErrorImpl(path); - } - } - break; - default: - throw SystemException("failed to wait for directory changes"); - } - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - FindCloseChangeNotification(hChange); - } - - void stop() - { - SetEvent(_hStopped); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - HANDLE _hStopped; -}; - - -#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - - -class LinuxDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - LinuxDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner), - _fd(-1), - _stopped(false) - { - _fd = inotify_init(); - if (_fd == -1) throw Poco::IOException("cannot initialize inotify", errno); - } - - ~LinuxDirectoryWatcherStrategy() - { - close(_fd); - } - - void run() - { - int mask = 0; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED) - mask |= IN_CREATE; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED) - mask |= IN_DELETE; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) - mask |= IN_MODIFY; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM) - mask |= IN_MOVED_FROM; - if (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO) - mask |= IN_MOVED_TO; - int wd = inotify_add_watch(_fd, owner().directory().path().c_str(), mask); - if (wd == -1) - { - try - { - FileImpl::handleLastErrorImpl(owner().directory().path()); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - - Poco::Buffer buffer(4096); - while (!_stopped.load(std::memory_order_relaxed)) - { - fd_set fds; - FD_ZERO(&fds); - FD_SET(_fd, &fds); - - struct timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 200000; - - if (select(_fd + 1, &fds, NULL, NULL, &tv) == 1) - { - int n = read(_fd, buffer.begin(), buffer.size()); - int i = 0; - if (n > 0) - { - while (n > 0) - { - struct inotify_event* event = reinterpret_cast(buffer.begin() + i); - - if (event->len > 0) - { - if (!owner().eventsSuspended()) - { - Poco::Path p(owner().directory().path()); - p.makeDirectory(); - p.setFileName(event->name); - Poco::File f(p.toString()); - - if ((event->mask & IN_CREATE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_ADDED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_ADDED); - owner().itemAdded(&owner(), ev); - } - if ((event->mask & IN_DELETE) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_REMOVED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_REMOVED); - owner().itemRemoved(&owner(), ev); - } - if ((event->mask & IN_MODIFY) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MODIFIED); - owner().itemModified(&owner(), ev); - } - if ((event->mask & IN_MOVED_FROM) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_FROM)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_FROM); - owner().itemMovedFrom(&owner(), ev); - } - if ((event->mask & IN_MOVED_TO) && (owner().eventMask() & DirectoryWatcher::DW_ITEM_MOVED_TO)) - { - DirectoryWatcher::DirectoryEvent ev(f, DirectoryWatcher::DW_ITEM_MOVED_TO); - owner().itemMovedTo(&owner(), ev); - } - } - } - - i += sizeof(inotify_event) + event->len; - n -= sizeof(inotify_event) + event->len; - } - } - } - } - } - - void stop() - { - _stopped.store(true, std::memory_order_relaxed); - } - - bool supportsMoveEvents() const - { - return true; - } - -private: - int _fd; - std::atomic _stopped; -}; - - -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - - -class BSDDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - BSDDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner), - _queueFD(-1), - _dirFD(-1), - _stopped(false) - { - _dirFD = open(owner.directory().path().c_str(), O_EVTONLY); - if (_dirFD < 0) throw Poco::FileNotFoundException(owner.directory().path()); - _queueFD = kqueue(); - if (_queueFD < 0) - { - close(_dirFD); - throw Poco::SystemException("Cannot create kqueue", errno); - } - } - - ~BSDDirectoryWatcherStrategy() - { - close(_dirFD); - close(_queueFD); - } - - void run() - { - Poco::Timestamp lastScan; - ItemInfoMap entries; - scan(entries); - - while (!_stopped.load(std::memory_order_relaxed)) - { - struct timespec timeout; - timeout.tv_sec = 0; - timeout.tv_nsec = 200000000; - unsigned eventFilter = NOTE_WRITE; - struct kevent event; - struct kevent eventData; - EV_SET(&event, _dirFD, EVFILT_VNODE, EV_ADD | EV_CLEAR, eventFilter, 0, 0); - int nEvents = kevent(_queueFD, &event, 1, &eventData, 1, &timeout); - if (nEvents < 0 || eventData.flags == EV_ERROR) - { - try - { - FileImpl::handleLastErrorImpl(owner().directory().path()); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - else if (nEvents > 0 || ((owner().eventMask() & DirectoryWatcher::DW_ITEM_MODIFIED) && lastScan.isElapsed(owner().scanInterval()*1000000))) - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - lastScan.update(); - } - } - } - - void stop() - { - _stopped.store(true, std::memory_order_relaxed); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - int _queueFD; - int _dirFD; - std::atomic _stopped; -}; - - -#else - - -class PollingDirectoryWatcherStrategy: public DirectoryWatcherStrategy -{ -public: - PollingDirectoryWatcherStrategy(DirectoryWatcher& owner): - DirectoryWatcherStrategy(owner) - { - } - - ~PollingDirectoryWatcherStrategy() - { - } - - void run() - { - ItemInfoMap entries; - scan(entries); - while (!_stopped.tryWait(1000*owner().scanInterval())) - { - try - { - ItemInfoMap newEntries; - scan(newEntries); - compare(entries, newEntries); - std::swap(entries, newEntries); - } - catch (Poco::Exception& exc) - { - owner().scanError(&owner(), exc); - } - } - } - - void stop() - { - _stopped.set(); - } - - bool supportsMoveEvents() const - { - return false; - } - -private: - Poco::Event _stopped; -}; - - -#endif - - -DirectoryWatcher::DirectoryWatcher(const std::string& path, int eventMask, int scanInterval): - _directory(path), - _eventMask(eventMask), - _scanInterval(scanInterval) -{ - init(); -} - - -DirectoryWatcher::DirectoryWatcher(const Poco::File& directory, int eventMask, int scanInterval): - _directory(directory), - _eventMask(eventMask), - _scanInterval(scanInterval) -{ - init(); -} - - -DirectoryWatcher::~DirectoryWatcher() -{ - try - { - stop(); - delete _pStrategy; - } - catch (...) - { - poco_unexpected(); - } -} - - -void DirectoryWatcher::suspendEvents() -{ - poco_assert (_eventsSuspended > 0); - - _eventsSuspended--; -} - - -void DirectoryWatcher::resumeEvents() -{ - _eventsSuspended++; -} - - -void DirectoryWatcher::init() -{ - if (!_directory.exists()) - throw Poco::FileNotFoundException(_directory.path()); - - if (!_directory.isDirectory()) - throw Poco::InvalidArgumentException("not a directory", _directory.path()); - -#if POCO_OS == POCO_OS_WINDOWS_NT - _pStrategy = new WindowsDirectoryWatcherStrategy(*this); -#elif POCO_OS == POCO_OS_LINUX || POCO_OS == POCO_OS_ANDROID - _pStrategy = new LinuxDirectoryWatcherStrategy(*this); -#elif POCO_OS == POCO_OS_MAC_OS_X || POCO_OS == POCO_OS_FREE_BSD - _pStrategy = new BSDDirectoryWatcherStrategy(*this); -#else - _pStrategy = new PollingDirectoryWatcherStrategy(*this); -#endif - _thread.start(*this); -} - - -void DirectoryWatcher::run() -{ - _pStrategy->run(); -} - - -void DirectoryWatcher::stop() -{ - _pStrategy->stop(); - _thread.join(); -} - - -bool DirectoryWatcher::supportsMoveEvents() const -{ - return _pStrategy->supportsMoveEvents(); -} - - -} // namespace Poco - - -#endif // POCO_NO_INOTIFY diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 1e0a82a1403..bc41819b717 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54480) +SET(VERSION_REVISION 54481) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 11) +SET(VERSION_MINOR 12) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 13adae0e42fd48de600486fc5d4b64d39f80c43e) -SET(VERSION_DESCRIBE v23.11.1.1-testing) -SET(VERSION_STRING 23.11.1.1) +SET(VERSION_GITHASH 05bc8ef1e02b9c7332f08091775b255d191341bf) +SET(VERSION_DESCRIBE v23.12.1.1-testing) +SET(VERSION_STRING 23.12.1.1) # end of autochange diff --git a/cmake/target.cmake b/cmake/target.cmake index 887f79bf24e..0d6993142b3 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -42,10 +42,8 @@ if (CMAKE_CROSSCOMPILING) if (ARCH_AARCH64) # FIXME: broken dependencies set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") elseif (ARCH_RISCV64) # RISC-V support is preliminary set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") @@ -73,19 +71,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - if (USE_MUSL) - # use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP' - set (ENABLE_SENTRY OFF CACHE INTERNAL "") - set (ENABLE_ODBC OFF CACHE INTERNAL "") - set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_HDFS OFF CACHE INTERNAL "") - set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") - # use of drand48_data - set (ENABLE_AZURE_BLOB_STORAGE OFF CACHE INTERNAL "") - endif () - - # Don't know why but CXX_STANDARD doesn't work for cross-compilation - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index a8f0705df88..e91ab38ca00 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -44,7 +44,6 @@ else () endif () add_contrib (miniselect-cmake miniselect) add_contrib (pdqsort-cmake pdqsort) -add_contrib (pocketfft-cmake pocketfft) add_contrib (crc32-vpmsum-cmake crc32-vpmsum) add_contrib (sparsehash-c11-cmake sparsehash-c11) add_contrib (abseil-cpp-cmake abseil-cpp) @@ -135,9 +134,9 @@ add_contrib (libuv-cmake libuv) add_contrib (liburing-cmake liburing) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv +add_contrib (curl-cmake curl) +add_contrib (azure-cmake azure) # requires: curl if (NOT OS_DARWIN) - add_contrib (curl-cmake curl) - add_contrib (azure-cmake azure) # requires: curl add_contrib (sentry-native-cmake sentry-native) # requires: curl endif() add_contrib (fmtlib-cmake fmtlib) @@ -156,6 +155,7 @@ add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (incbin-cmake incbin) +add_contrib (sqids-cpp-cmake sqids-cpp) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/azure b/contrib/azure index 096049bf24f..352ff0a61cb 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 096049bf24fffafcaccc132b9367694532716731 +Subproject commit 352ff0a61cb319ac1cc38c4058443ddf70147530 diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 7e86352befc..e74629e57b3 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -10,7 +10,7 @@ set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS "${LIBRARY_DIR}/lib/altsvc.c" "${LIBRARY_DIR}/lib/amigaos.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/asyn-ares.c" "${LIBRARY_DIR}/lib/base64.c" "${LIBRARY_DIR}/lib/bufq.c" "${LIBRARY_DIR}/lib/bufref.c" @@ -165,13 +165,14 @@ target_compile_definitions (_curl PRIVATE libcurl_EXPORTS OS="${CMAKE_SYSTEM_NAME}" ) + target_include_directories (_curl SYSTEM PUBLIC "${LIBRARY_DIR}/include" "${LIBRARY_DIR}/lib" . # curl_config.h ) -target_link_libraries (_curl PRIVATE OpenSSL::SSL) +target_link_libraries (_curl PRIVATE OpenSSL::SSL ch_contrib::c-ares) # The library is large - avoid bloat (XXX: is it?) if (OMIT_HEAVY_DEBUG_SYMBOLS) diff --git a/contrib/curl-cmake/curl_config.h b/contrib/curl-cmake/curl_config.h index f56ba3eccd5..a38aa60fe6d 100644 --- a/contrib/curl-cmake/curl_config.h +++ b/contrib/curl-cmake/curl_config.h @@ -50,3 +50,4 @@ #define ENABLE_IPV6 #define USE_OPENSSL #define USE_THREADS_POSIX +#define USE_ARES diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index c7ee34e6e28..ac362d0296e 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -33,7 +33,7 @@ target_include_directories(cxxabi SYSTEM BEFORE PRIVATE $ PRIVATE $ ) -target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) +target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DHAS_THREAD_LOCAL) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. target_link_libraries(cxxabi PUBLIC unwind) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index bdcb91354b1..b9598e60167 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit bdcb91354b1c05b21e73043a112a6f1e3b013497 +Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index 4278575fd7f..8cd951af746 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -26,6 +26,11 @@ ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS) ADD_DEFINITIONS(-D_GNU_SOURCE) ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP) ADD_DEFINITIONS(-DHAVE_NANOSLEEP) + +if (USE_MUSL) + ADD_DEFINITIONS(-DSTRERROR_R_RETURN_INT) +endif () + set(HAVE_STEADY_CLOCK 1) set(HAVE_NESTED_EXCEPTION 1) SET(HAVE_BOOST_CHRONO 0) diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h index 9eabfaa50c8..c2faeb47cb1 100644 --- a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -270,7 +270,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * Whether iconv support is available */ -#if 1 +#if 0 #define LIBXML_ICONV_ENABLED #endif @@ -499,5 +499,3 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); } #endif /* __cplusplus */ #endif - - diff --git a/contrib/llvm-project b/contrib/llvm-project index e7b8befca85..1834e42289c 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0 +Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec diff --git a/contrib/pocketfft b/contrib/pocketfft deleted file mode 160000 index 9efd4da52cf..00000000000 --- a/contrib/pocketfft +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 diff --git a/contrib/pocketfft-cmake/CMakeLists.txt b/contrib/pocketfft-cmake/CMakeLists.txt deleted file mode 100644 index 01911ee4496..00000000000 --- a/contrib/pocketfft-cmake/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -option (ENABLE_POCKETFFT "Enable pocketfft" ${ENABLE_LIBRARIES}) - -if (NOT ENABLE_POCKETFFT) - message(STATUS "Not using pocketfft") - return() -endif() - -add_library(_pocketfft INTERFACE) -target_include_directories(_pocketfft INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/pocketfft) -add_library(ch_contrib::pocketfft ALIAS _pocketfft) diff --git a/contrib/qpl b/contrib/qpl index faaf1935045..a61bdd845fd 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit faaf19350459c076e66bb5df11743c3fade59b73 +Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 7d7666dff87..c4220ba90ac 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -117,7 +117,7 @@ endif() add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX) -if (OS_LINUX OR OS_FREEBSD) +if ((OS_LINUX OR OS_FREEBSD) AND NOT USE_MUSL) add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX) endif() diff --git a/contrib/sentry-native b/contrib/sentry-native index ae10fb8c224..bc359f86cbf 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b +Subproject commit bc359f86cbf0f73f6fd4b6bfb4ede0c1f8c9400f diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt index 377f955f856..6364e75db28 100644 --- a/contrib/sentry-native-cmake/CMakeLists.txt +++ b/contrib/sentry-native-cmake/CMakeLists.txt @@ -13,6 +13,7 @@ set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native") set (SRCS ${SRC_DIR}/vendor/mpack.c + ${SRC_DIR}/vendor/stb_sprintf.c ${SRC_DIR}/src/sentry_alloc.c ${SRC_DIR}/src/sentry_backend.c ${SRC_DIR}/src/sentry_core.c @@ -21,6 +22,7 @@ set (SRCS ${SRC_DIR}/src/sentry_json.c ${SRC_DIR}/src/sentry_logger.c ${SRC_DIR}/src/sentry_options.c + ${SRC_DIR}/src/sentry_os.c ${SRC_DIR}/src/sentry_random.c ${SRC_DIR}/src/sentry_ratelimiter.c ${SRC_DIR}/src/sentry_scope.c @@ -29,6 +31,7 @@ set (SRCS ${SRC_DIR}/src/sentry_string.c ${SRC_DIR}/src/sentry_sync.c ${SRC_DIR}/src/sentry_transport.c + ${SRC_DIR}/src/sentry_tracing.c ${SRC_DIR}/src/sentry_utils.c ${SRC_DIR}/src/sentry_uuid.c ${SRC_DIR}/src/sentry_value.c diff --git a/contrib/sqids-cpp b/contrib/sqids-cpp new file mode 160000 index 00000000000..3756e537d4d --- /dev/null +++ b/contrib/sqids-cpp @@ -0,0 +1 @@ +Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0 diff --git a/contrib/sqids-cpp-cmake/CMakeLists.txt b/contrib/sqids-cpp-cmake/CMakeLists.txt new file mode 100644 index 00000000000..7eb77c92437 --- /dev/null +++ b/contrib/sqids-cpp-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +option(ENABLE_SQIDS "Enable sqids support" ${ENABLE_LIBRARIES}) +if ((NOT ENABLE_SQIDS)) + message (STATUS "Not using sqids") + return() +endif() + +set (SQIDS_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqids-cpp") +set (SQIDS_INCLUDE_DIR "${SQIDS_SOURCE_DIR}/include") + +add_library(_sqids INTERFACE) +target_include_directories(_sqids SYSTEM INTERFACE ${SQIDS_INCLUDE_DIR}) + +add_library(ch_contrib::sqids ALIAS _sqids) +target_compile_definitions(_sqids INTERFACE ENABLE_SQIDS) diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 3317654cd67..6fbe8c14ebb 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ option (ENABLE_ODBC "Enable ODBC library" ${ENABLE_LIBRARIES}) -if (NOT OS_LINUX) +if (NOT OS_LINUX OR USE_MUSL) if (ENABLE_ODBC) - message(STATUS "ODBC is only supported on Linux") + message(STATUS "ODBC is only supported on Linux with dynamic linking") endif() set (ENABLE_ODBC OFF CACHE INTERNAL "") endif () diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index b174dfde675..8fc639af1a7 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.10.5.20" +ARG VERSION="23.11.1.2711" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/packager/packager b/docker/packager/packager index b5bcbada1da..ade36a55591 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -145,6 +145,7 @@ def parse_env_variables( RISCV_SUFFIX = "-riscv64" S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" + AMD64_MUSL_SUFFIX = "-amd64-musl" result = [] result.append("OUTPUT_DIR=/output") @@ -163,6 +164,7 @@ def parse_env_variables( is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) + is_amd64_musl = compiler.endswith(AMD64_MUSL_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] @@ -232,6 +234,12 @@ def parse_env_variables( cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") cmake_flags.append("-DNO_SSE3_OR_HIGHER=1") + elif is_amd64_musl: + cc = compiler[: -len(AMD64_MUSL_SUFFIX)] + result.append("DEB_ARCH=amd64") + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake" + ) else: cc = compiler result.append("DEB_ARCH=amd64") @@ -396,6 +404,7 @@ def parse_args() -> argparse.Namespace: "clang-17-riscv64", "clang-17-s390x", "clang-17-amd64-compat", + "clang-17-amd64-musl", "clang-17-freebsd", ), default="clang-17", diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index d4498abda6a..76b03218eab 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.10.5.20" +ARG VERSION="23.11.1.2711" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 08e95cd535b..c6dfcf9f679 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.10.5.20" +ARG VERSION="23.11.1.2711" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image @@ -83,7 +83,7 @@ RUN if ! clickhouse local -q "SELECT ''" > /dev/null 2>&1; then \ && GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring \ --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 \ - && rm -r "$GNUPGHOME" \ + && rm -rf "$GNUPGHOME" \ && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ && echo "installing from repository: ${REPOSITORY}" \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index d3695ba2613..5af05034415 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -16,7 +16,7 @@ export LLVM_VERSION=${LLVM_VERSION:-17} # it being undefined. Also read it as array so that we can pass an empty list # of additional variable to cmake properly, and it doesn't generate an extra # empty parameter. -# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequential launch +# Read it as CMAKE_FLAGS to not lose exported FASTTEST_CMAKE_FLAGS on subsequent launch read -ra CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" # Run only matching tests. @@ -197,7 +197,7 @@ function run_cmake ( cd "$FASTTEST_BUILD" - cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" + cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER="clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="clang-${LLVM_VERSION}" -DCMAKE_TOOLCHAIN_FILE="${FASTTEST_SOURCE}/cmake/linux/toolchain-x86_64-musl.cmake" "${CMAKE_LIBS_CONFIG[@]}" "${CMAKE_FLAGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" ) } diff --git a/docker/test/integration/helper_container/Dockerfile b/docker/test/integration/helper_container/Dockerfile index 60adaea1796..49a3d3cd84b 100644 --- a/docker/test/integration/helper_container/Dockerfile +++ b/docker/test/integration/helper_container/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/integration-helper . # Helper docker container to run iptables without sudo -FROM alpine +FROM alpine:3.18 RUN apk add --no-cache -U iproute2 \ && for bin in iptables iptables-restore iptables-save; \ do ln -sf xtables-nft-multi "/sbin/$bin"; \ diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 5cf71e4d3f8..48457a99de3 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -20,7 +20,8 @@ RUN apt-get update --yes \ RUN pip3 install \ numpy \ pyodbc \ - deepdiff + deepdiff \ + sqlglot ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" @@ -35,7 +36,7 @@ RUN git clone --recursive ${odbc_repo} \ && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=900 +ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index db828741b0d..ccba344035e 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -75,6 +75,20 @@ function run_tests() cat /test_output/statements-test/check_status.tsv >> /test_output/check_status.tsv cat /test_output/statements-test/test_results.tsv >> /test_output/test_results.tsv tar -zcvf statements-check.tar.gz statements-test 1>/dev/null + + mkdir -p /test_output/complete-test + /clickhouse-tests/sqllogic/runner.py \ + --log-file /test_output/runner-complete-test.log \ + --log-level info \ + complete-test \ + --input-dir /sqllogictest \ + --out-dir /test_output/complete-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/complete-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/complete-test/test_results.tsv >> /test_output/test_results.tsv + tar -zcvf complete-check.tar.gz complete-test 1>/dev/null fi } diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index ad3c3477b37..a0def50bfb5 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -24,6 +24,22 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml +cache_policy="" +if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then + cache_policy="SLRU" +else + cache_policy="LRU" +fi + +echo "Using cache policy: $cache_policy" + +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 07b40ea3b3d..a2e2a708aaf 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -19,10 +19,14 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb +echo "$BUGFIX_VALIDATE_CHECK" + # Check that the tools are available under short names -ch --query "SELECT 1" || exit 1 -chl --query "SELECT 1" || exit 1 -chc --version || exit 1 +if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then + ch --query "SELECT 1" || exit 1 + chl --query "SELECT 1" || exit 1 + chc --version || exit 1 +fi ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test @@ -46,6 +50,16 @@ fi config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml +if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then + sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \ + | sed "/1<\/use_compression>/d" \ + > /etc/clickhouse-server/config.d/zookeeper.xml.tmp + sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml + + # it contains some new settings, but we can safely remove it + rm /etc/clickhouse-server/users.d/s3_cache_new.xml +fi + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index b5092fd40df..67056cc1bc1 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -65,9 +65,27 @@ chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" + stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Randomize cache policies. +cache_policy="" +if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then + cache_policy="SLRU" +else + cache_policy="LRU" +fi + +echo "Using cache policy: $cache_policy" + +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + start clickhouse-client --query "SHOW TABLES FROM datasets" @@ -191,6 +209,19 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ > /etc/clickhouse-server/config.d/logger_trace.xml.tmp mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml +if [ "$cache_policy" = "SLRU" ]; then + sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ + | sed "s|LRU|SLRU|" \ + > /etc/clickhouse-server/config.d/storage_conf.xml.tmp + mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +fi + +# Randomize async_load_databases +if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then + sudo echo "true" \ + > /etc/clickhouse-server/config.d/enable_async_load_databases.xml +fi + start stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 57b683a16c3..158ac19229e 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -79,6 +79,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml +rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml start stop @@ -116,6 +117,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml +rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml start diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md new file mode 100644 index 00000000000..e32dee41dc7 --- /dev/null +++ b/docs/changelogs/v23.11.1.2711-stable.md @@ -0,0 +1,525 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.1.2711-stable (05bc8ef1e02) FIXME as compared to v23.10.1.1976-stable (13adae0e42f) + +#### Backward Incompatible Change +* Formatters `%l`/`%k`/`%c` in function `parseDateTime()` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). +* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Multiple improvements for RESPECT/IGNORE NULLS. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). +* Remove optimization optimize_move_functions_out_of_any. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with Ordinary, Atomic and Replicated engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added table `system.async_loader`. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). +* 1. Add function `extractPlainRanges` to `KeyCondition`. 2. Add some useful functions to `Range` 3. Add `PlainRanges` who represent a serious of ranges that ordered and no overlapping. 4. Add `NumbersRangedSource` who can accurately return user selected numbers. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). +* Add system table `blob_storage_log`. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). +* Use statistic to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). +* Added a new aggregation function `groupArraySorted(n)(value)` which returns an array with the n first values from a field value sorted by itself. [#53562](https://github.com/ClickHouse/ClickHouse/pull/53562) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added support for compression in keeper protocol. Can be enabled on clickhouse by using this flag `use_compression` inside `zookeeper`. resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). +* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with new format VERSION_FULL_OBJECT_KEY. With that format CH stores full remote object key in the metadata file. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). +* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Add `hostname` column to all system log tables;. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). +* Added function `fromDaysSinceYearZero()` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). +* Implemented series period detect method using FFT in pocketFFT lib. [#56171](https://github.com/ClickHouse/ClickHouse/pull/56171) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). +* ... [#56275](https://github.com/ClickHouse/ClickHouse/pull/56275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This pull request implements new setting called `preferred_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* S3 adaptive timeout means that first attempt made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). +* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Added a new SQL function, "arrayRandomSample(arr, k)" which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. "SELECT arrayReduce('groupArraySample(3)', range(10))". [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). +* Added support for `float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add function `getClientHTTPHeader` for fetching values header values set in the HTTP request. [#56488](https://github.com/ClickHouse/ClickHouse/pull/56488) ([凌涛](https://github.com/lingtaolf)). +* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). +* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add 4-letter command for yielding/resigning leadership. [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). +* Introduce `fileCluster` table function. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). +* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). +* Expose the number of errors occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57442](https://github.com/ClickHouse/ClickHouse/pull/57442) ([awakeljw](https://github.com/awakeljw)). + +#### Performance Improvement +* Support window functions parallel evaluation. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). +* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed filtering by `IN(...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). +* An improvement which takes place when cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add ability to disable checksums for S3 to avoid excessive input file read (this new behavior could be enabled with `s3_disable_checksum=true`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). +* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, doesn't require synchronisation inside thread pool, doesn't hesitate to do `seek`-s on local fs and reduces cpu wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). +* ... This PR follows [#55929](https://github.com/ClickHouse/ClickHouse/issues/55929), it will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of **7.4%, 5.9%, 4.7%, 3.0%, and 4.6%** to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* WindowTransform decrease amount of virtual function calls. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow recursive tuple field pruning in ORC to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). +* This pull request provides countRows support for Npy data format. Now with setting `optimize_count_from_files=1` queries like `select count() from file(data.npy)` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve performance of executing queries for use cases with many users. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). +* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). +* There are cases when stack unwinding was slow. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Engine `Merge` filters the records according to the row policies of the underlying tables. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). +* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). +* Fix possible postgresql logical replication conversion_error when using MaterializedPostgreSQL. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). +* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). +* While read data from a csv format file, and at end of line is'\r' , which not followed by '\n', then we will enconter the exception as below ``` Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.: ``` In clickhouse, the csv end of line must be \n or \r\n or \n\r, so the \r must be followed by \n , but in some suitation, the csv input data is abnormal, like above, \r is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). +* Update arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). +* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixed accounting of memory allocated before attaching thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path, when running on AWS environment. [#56104](https://github.com/ClickHouse/ClickHouse/pull/56104) ([Jianfei Hu](https://github.com/incfly)). +* Add support for LARGE_LIST with Arrow. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). +* Improved performance of glob matching for `file` and `hdfs` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). +* Posting lists in inverted indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add ability to specify BlockBasedTableOptions for EmbeddedRocksDB. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). +* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Parallelise `BackupEntriesCollector`. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed handling of non-const query strings in functions `formatQuery()`/ `formatQuerySingleLine()`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). +* Support create and materialized index in the same alter query, also support modity TTL and materialize TTL in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). +* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). +* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). +* Add transition from reading key to reading quoted key when double quotes are found. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix transfer query to MySQL compatible query. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). +* Add support for backing up and restoring tables using KeeperMap engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). +* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). +* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). +* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). +* Rewrite equality with `is null` check in JOIN ON section. *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). +* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). +* Allow getting cache configuration from 'named_collection' section in config or from sql created named collection. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `query_masking_rules` when reloading the config ([#56449](https://github.com/ClickHouse/ClickHouse/issues/56449)). [#56573](https://github.com/ClickHouse/ClickHouse/pull/56573) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Make removeoutdatedtables() less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). +* Currenting setting takes too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). +* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). +* Do not allow tables on different replicas have different aggregate functions in SimpleAggregateFunction columns. [#56724](https://github.com/ClickHouse/ClickHouse/pull/56724) ([Duc Canh Le](https://github.com/canhld94)). +* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). +* Add config setting `wait_dictionaries_load_at_startup`:. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). +* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). +* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). +* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). +* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). +* Lz4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). +* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a setting for PostgreSQL table engine setting in the config file Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). +* Run interpreter with `only_analyze` flag in getsampleblock method. [#56972](https://github.com/ClickHouse/ClickHouse/pull/56972) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Function `concat()` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). +* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). +* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). +* Add system.s3queue_log to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). +* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). +* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not demangle and symbolize stack frames from __functional c++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). +* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57220](https://github.com/ClickHouse/ClickHouse/pull/57220) ([flynn](https://github.com/ucasfl)). +* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). +* This pr gives possibility to use suffixes (K, M, G, T, E) along with the amount of memory to be used. Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). +* Make alter materialized view non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). +* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). +* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). +* Introduce the limit for the maximum number of table projections (default 25). [#57491](https://github.com/ClickHouse/ClickHouse/pull/57491) ([Julia Kartseva](https://github.com/jkartseva)). +* Enable `async_block_ids_cache` by default for `async_inserts` deduplication. [#57513](https://github.com/ClickHouse/ClickHouse/pull/57513) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement +* Enable temporary_data_in_cache in s3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). +* Run sqllogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). +* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In [#54043](https://github.com/ClickHouse/ClickHouse/issues/54043) the setup plan started to appear in the logs. It should be only in the `runner_get_all_tests.log` only. As well, send the failed infrastructure event to CI db. [#56214](https://github.com/ClickHouse/ClickHouse/pull/56214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). +* Add an option to disable libssh. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not fetch changed submodules in the builder container. [#56689](https://github.com/ClickHouse/ClickHouse/pull/56689) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set memory usage for client (`1G`) to address problems like this: https://s3.amazonaws.com/clickhouse-test-reports/0/f1bf3f1fc39f520871ec878d815e515e12fd3e7b/fuzzer_astfuzzertsan/report.html. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There was an attempt to have the proper listing in [#44311](https://github.com/ClickHouse/ClickHouse/issues/44311), but the fix itself was in the wrong place, so it's still broken. See an [example](https://github.com/ClickHouse/ClickHouse/actions/runs/6897342568/job/18781001022#step:8:25). [#56989](https://github.com/ClickHouse/ClickHouse/pull/56989) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed the memory leak in integration test of postgres dictionary. The case of network partition is not correctly handled at the time of the repo pulled years ago. [#57231](https://github.com/ClickHouse/ClickHouse/pull/57231) ([jsc0218](https://github.com/jsc0218)). +* Fix a test filename typo. [#57272](https://github.com/ClickHouse/ClickHouse/pull/57272) ([jsc0218](https://github.com/jsc0218)). +* Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Setting JoinAlgorithm respect specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). +* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). +* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). +* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). +* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). +* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). +* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). +* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). +* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). +* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). +* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). +* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). +* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix `tryBase64Decode()` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). +* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). +* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). +* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). +* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). +* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). +* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). +* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). +* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). +* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). +* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix function jsonMergePatch for partially const columns [#57379](https://github.com/ClickHouse/ClickHouse/pull/57379) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix ubsan error in `Arena` [#57407](https://github.com/ClickHouse/ClickHouse/pull/57407) ([Nikita Taranov](https://github.com/nickitat)). +* fs cache: add limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add function `arrayRandomSample()`"'. [#56399](https://github.com/ClickHouse/ClickHouse/pull/56399) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Update README.md'. [#56549](https://github.com/ClickHouse/ClickHouse/pull/56549) ([Tyler Hannan](https://github.com/tylerhannan)). +* NO CL ENTRY: 'Revert "FunctionSleep exception message fix"'. [#56591](https://github.com/ClickHouse/ClickHouse/pull/56591) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Inserting only non-duplicate chunks in MV"'. [#56598](https://github.com/ClickHouse/ClickHouse/pull/56598) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Add new header for README with updated logo'. [#56607](https://github.com/ClickHouse/ClickHouse/pull/56607) ([Justin de Guzman](https://github.com/justindeguzman)). +* NO CL ENTRY: 'Revert "Add /keeper/availability-zone node to allow server load balancing within AZ."'. [#56610](https://github.com/ClickHouse/ClickHouse/pull/56610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add 4-letter command for yielding/resigning leadership"'. [#56611](https://github.com/ClickHouse/ClickHouse/pull/56611) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'fix(docs): correct default value for output_format_parquet_compression_method to 'lz4''. [#56614](https://github.com/ClickHouse/ClickHouse/pull/56614) ([james-seymour-cubiko](https://github.com/james-seymour-cubiko)). +* NO CL ENTRY: 'Update except.md'. [#56651](https://github.com/ClickHouse/ClickHouse/pull/56651) ([rondo_1895](https://github.com/yangguang1991)). +* NO CL ENTRY: 'Revert "Add a setting max_execution_time_leaf to limit the execution time on shard for distributed query"'. [#56702](https://github.com/ClickHouse/ClickHouse/pull/56702) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Better except for SSL authentication failure"'. [#56844](https://github.com/ClickHouse/ClickHouse/pull/56844) ([Antonio Andelic](https://github.com/antonio2368)). +* NO CL ENTRY: 'Revert "s3 adaptive timeouts"'. [#56992](https://github.com/ClickHouse/ClickHouse/pull/56992) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "s3 adaptive timeouts""'. [#56994](https://github.com/ClickHouse/ClickHouse/pull/56994) ([Sema Checherinda](https://github.com/CheSema)). +* NO CL ENTRY: 'Revert "Resubmit 01600_parts_types_metrics test (possibly without flakiness)"'. [#57163](https://github.com/ClickHouse/ClickHouse/pull/57163) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Mark select() as harmful function"'. [#57195](https://github.com/ClickHouse/ClickHouse/pull/57195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Update Sentry"'. [#57229](https://github.com/ClickHouse/ClickHouse/pull/57229) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add debugging info for 01600_parts_types_metrics on failures"'. [#57232](https://github.com/ClickHouse/ClickHouse/pull/57232) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Update date-time-functions.md"'. [#57329](https://github.com/ClickHouse/ClickHouse/pull/57329) ([Denny Crane](https://github.com/den-crane)). +* NO CL ENTRY: 'Revert "add function getClientHTTPHeader"'. [#57510](https://github.com/ClickHouse/ClickHouse/pull/57510) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add `sqid()` function"'. [#57511](https://github.com/ClickHouse/ClickHouse/pull/57511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add new aggregation function groupArraySorted()"'. [#57519](https://github.com/ClickHouse/ClickHouse/pull/57519) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Implemented series period detect method using pocketfft lib"'. [#57536](https://github.com/ClickHouse/ClickHouse/pull/57536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Support use alias column in indices"'. [#57537](https://github.com/ClickHouse/ClickHouse/pull/57537) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove useless "install" from CMake (step 1) [#36589](https://github.com/ClickHouse/ClickHouse/pull/36589) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer support 'is not distinct from' in join on section [#54068](https://github.com/ClickHouse/ClickHouse/pull/54068) ([vdimir](https://github.com/vdimir)). +* Refactor merge join transform [#55007](https://github.com/ClickHouse/ClickHouse/pull/55007) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Add function jaccardIndex back with better performance [#55126](https://github.com/ClickHouse/ClickHouse/pull/55126) ([vdimir](https://github.com/vdimir)). +* Use more thread pools in BACKUP/RESTORE to avoid its hanging in tests [#55216](https://github.com/ClickHouse/ClickHouse/pull/55216) ([Vitaly Baranov](https://github.com/vitlibar)). +* Parallel replicas: progress bar [#55574](https://github.com/ClickHouse/ClickHouse/pull/55574) ([Igor Nikonov](https://github.com/devcrafter)). +* Analyzer: Fix result type after IfConstantConditionPass [#55951](https://github.com/ClickHouse/ClickHouse/pull/55951) ([Dmitry Novik](https://github.com/novikd)). +* RemoteSource: remove unnecessary flag [#55980](https://github.com/ClickHouse/ClickHouse/pull/55980) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `REPLICA_ALREADY_EXISTS` for ReplicatedMergeTree [#56000](https://github.com/ClickHouse/ClickHouse/pull/56000) ([Nikolay Degterinsky](https://github.com/evillique)). +* Rework [#52159](https://github.com/ClickHouse/ClickHouse/issues/52159) to avoid coredump generation [#56039](https://github.com/ClickHouse/ClickHouse/pull/56039) ([Raúl Marín](https://github.com/Algunenano)). +* Bump gRPC to v1.47.5 [#56059](https://github.com/ClickHouse/ClickHouse/pull/56059) ([Robert Schulze](https://github.com/rschu1ze)). +* See what happens if we use less different docker images in integration tests [#56082](https://github.com/ClickHouse/ClickHouse/pull/56082) ([Raúl Marín](https://github.com/Algunenano)). +* Add missing zookeeper retries in StorageReplicatedMergeTree::backupData [#56131](https://github.com/ClickHouse/ClickHouse/pull/56131) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better process broken parts on table start for replicated tables [#56142](https://github.com/ClickHouse/ClickHouse/pull/56142) ([alesapin](https://github.com/alesapin)). +* Add more details to "Data after merge is not byte-identical to data on another replicas" [#56164](https://github.com/ClickHouse/ClickHouse/pull/56164) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Revert "Fix output/input of Arrow dictionary column"" [#56167](https://github.com/ClickHouse/ClickHouse/pull/56167) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a log message for DatabaseReplicated [#56215](https://github.com/ClickHouse/ClickHouse/pull/56215) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Correct aggregate function cross tab accessors to be endianness-independent. [#56223](https://github.com/ClickHouse/ClickHouse/pull/56223) ([Austin Kothig](https://github.com/kothiga)). +* Fix client suggestions for user without grants [#56234](https://github.com/ClickHouse/ClickHouse/pull/56234) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix link to failed check report in status commit [#56243](https://github.com/ClickHouse/ClickHouse/pull/56243) ([vdimir](https://github.com/vdimir)). +* Analyzer: fix 01019_alter_materialized_view_consistent [#56246](https://github.com/ClickHouse/ClickHouse/pull/56246) ([vdimir](https://github.com/vdimir)). +* Properly process aliases for aggregation-by-partition optimization. [#56254](https://github.com/ClickHouse/ClickHouse/pull/56254) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* deltalake: Do not raise errors when processing add and remove actions [#56260](https://github.com/ClickHouse/ClickHouse/pull/56260) ([joelynch](https://github.com/joelynch)). +* Fix rare logical error in Replicated database [#56272](https://github.com/ClickHouse/ClickHouse/pull/56272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v23.10.1.1976-stable [#56278](https://github.com/ClickHouse/ClickHouse/pull/56278) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add assertion that `SizePredictor` is set if `preferred_block_size_bytes` is set [#56302](https://github.com/ClickHouse/ClickHouse/pull/56302) ([Nikita Taranov](https://github.com/nickitat)). +* Implement digest helpers for different objects [#56305](https://github.com/ClickHouse/ClickHouse/pull/56305) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Removed stale events from README [#56311](https://github.com/ClickHouse/ClickHouse/pull/56311) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix more tests with analyzer. [#56315](https://github.com/ClickHouse/ClickHouse/pull/56315) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Change some exception codes [#56316](https://github.com/ClickHouse/ClickHouse/pull/56316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix using table shared id during backup and improve logs. [#56339](https://github.com/ClickHouse/ClickHouse/pull/56339) ([Vitaly Baranov](https://github.com/vitlibar)). +* Print info while decompressing the binary [#56360](https://github.com/ClickHouse/ClickHouse/pull/56360) ([Antonio Andelic](https://github.com/antonio2368)). +* remove unstable test test_heavy_insert_select_check_memory [#56369](https://github.com/ClickHouse/ClickHouse/pull/56369) ([Sema Checherinda](https://github.com/CheSema)). +* Update test_storage_s3_queue/test.py [#56370](https://github.com/ClickHouse/ClickHouse/pull/56370) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update 02735_system_zookeeper_connection.sql [#56374](https://github.com/ClickHouse/ClickHouse/pull/56374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Cleanup convenience functions in IDataType [#56375](https://github.com/ClickHouse/ClickHouse/pull/56375) ([Robert Schulze](https://github.com/rschu1ze)). +* Update test_storage_s3_queue [#56376](https://github.com/ClickHouse/ClickHouse/pull/56376) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Minor improvements for S3Queue [#56377](https://github.com/ClickHouse/ClickHouse/pull/56377) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add obsolete setting back [#56382](https://github.com/ClickHouse/ClickHouse/pull/56382) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Rewrite jobs to use callable workflow [#56385](https://github.com/ClickHouse/ClickHouse/pull/56385) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update stress.py [#56388](https://github.com/ClickHouse/ClickHouse/pull/56388) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix rocksdb with analyzer. [#56391](https://github.com/ClickHouse/ClickHouse/pull/56391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Option to check particular file with utils/check-style/check-doc-aspell [#56394](https://github.com/ClickHouse/ClickHouse/pull/56394) ([vdimir](https://github.com/vdimir)). +* Add a metric for suspicious parts in ZooKeeper [#56395](https://github.com/ClickHouse/ClickHouse/pull/56395) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 02404_memory_bound_merging with analyzer. [#56419](https://github.com/ClickHouse/ClickHouse/pull/56419) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* move storage_metadata_write_full_object_key setting to the server scope [#56421](https://github.com/ClickHouse/ClickHouse/pull/56421) ([Sema Checherinda](https://github.com/CheSema)). +* Make autoscaling more responsive [#56422](https://github.com/ClickHouse/ClickHouse/pull/56422) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix `test_attach_without_fetching` [#56429](https://github.com/ClickHouse/ClickHouse/pull/56429) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use `pcg` + `randomSeed()` instead of `std::mt19937`/`std::random_device` [#56430](https://github.com/ClickHouse/ClickHouse/pull/56430) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02725_database_hdfs.sh` [#56457](https://github.com/ClickHouse/ClickHouse/pull/56457) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update the AMI receipt [#56459](https://github.com/ClickHouse/ClickHouse/pull/56459) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make IMergeTreeDataPart::getState() inlinable [#56461](https://github.com/ClickHouse/ClickHouse/pull/56461) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.10.2.13-stable [#56467](https://github.com/ClickHouse/ClickHouse/pull/56467) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.4.11-stable [#56468](https://github.com/ClickHouse/ClickHouse/pull/56468) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.6.16-lts [#56469](https://github.com/ClickHouse/ClickHouse/pull/56469) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.16.7-lts [#56470](https://github.com/ClickHouse/ClickHouse/pull/56470) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Disable randomization of allow_experimental_block_number_column flag [#56474](https://github.com/ClickHouse/ClickHouse/pull/56474) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Parallel clone sparse/shallow submodules [#56479](https://github.com/ClickHouse/ClickHouse/pull/56479) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix default port for Replicated database cluster [#56486](https://github.com/ClickHouse/ClickHouse/pull/56486) ([Nikolay Degterinsky](https://github.com/evillique)). +* Updated compression to LZ4 [#56497](https://github.com/ClickHouse/ClickHouse/pull/56497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Analyzer remove unused projection columns [#56499](https://github.com/ClickHouse/ClickHouse/pull/56499) ([Maksim Kita](https://github.com/kitaisreal)). +* FunctionSleep exception message fix [#56500](https://github.com/ClickHouse/ClickHouse/pull/56500) ([Maksim Kita](https://github.com/kitaisreal)). +* Continue rewriting workflows to reusable tests [#56501](https://github.com/ClickHouse/ClickHouse/pull/56501) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer special functions projection names fix [#56514](https://github.com/ClickHouse/ClickHouse/pull/56514) ([Maksim Kita](https://github.com/kitaisreal)). +* CTE invalid query analysis add test [#56517](https://github.com/ClickHouse/ClickHouse/pull/56517) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix compilation of BackupsWorker.cpp [#56518](https://github.com/ClickHouse/ClickHouse/pull/56518) ([Vitaly Baranov](https://github.com/vitlibar)). +* Analyzer MoveFunctionsOutOfAnyPass refactoring [#56520](https://github.com/ClickHouse/ClickHouse/pull/56520) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer support EXPLAIN ESTIMATE [#56522](https://github.com/ClickHouse/ClickHouse/pull/56522) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer log used row policies [#56531](https://github.com/ClickHouse/ClickHouse/pull/56531) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer ORDER BY read in order query plan add test [#56532](https://github.com/ClickHouse/ClickHouse/pull/56532) ([Maksim Kita](https://github.com/kitaisreal)). +* ReplicatedMergeTree: check shutdown flags in retry loops [#56533](https://github.com/ClickHouse/ClickHouse/pull/56533) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race between REPLACE_RANGE and GET_PART (set actual part name when fetching) [#56536](https://github.com/ClickHouse/ClickHouse/pull/56536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Bump gRPC to v1.54.3 [#56543](https://github.com/ClickHouse/ClickHouse/pull/56543) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky LDAP integration tests [#56544](https://github.com/ClickHouse/ClickHouse/pull/56544) ([Julian Maicher](https://github.com/jmaicher)). +* Remove useless using [#56546](https://github.com/ClickHouse/ClickHouse/pull/56546) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better warning message [#56547](https://github.com/ClickHouse/ClickHouse/pull/56547) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow `chassert` to guide the static analyzer [#56552](https://github.com/ClickHouse/ClickHouse/pull/56552) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove C++ templates [#56556](https://github.com/ClickHouse/ClickHouse/pull/56556) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_keeper_four_word_command/test.py::test_cmd_crst` [#56570](https://github.com/ClickHouse/ClickHouse/pull/56570) ([Antonio Andelic](https://github.com/antonio2368)). +* Delete unnecessary file from tests [#56572](https://github.com/ClickHouse/ClickHouse/pull/56572) ([vdimir](https://github.com/vdimir)). +* Analyzer: fix logical error with set in array join [#56587](https://github.com/ClickHouse/ClickHouse/pull/56587) ([vdimir](https://github.com/vdimir)). +* hide VERSION_INLINE_DATA under feature flag [#56594](https://github.com/ClickHouse/ClickHouse/pull/56594) ([Sema Checherinda](https://github.com/CheSema)). +* Fix 02554_fix_grouping_sets_predicate_push_down with analyzer. [#56595](https://github.com/ClickHouse/ClickHouse/pull/56595) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add "FunctionSleep exception message fix" again [#56597](https://github.com/ClickHouse/ClickHouse/pull/56597) ([Raúl Marín](https://github.com/Algunenano)). +* Update version_date.tsv and changelogs after v23.10.3.5-stable [#56606](https://github.com/ClickHouse/ClickHouse/pull/56606) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove bad test [#56612](https://github.com/ClickHouse/ClickHouse/pull/56612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Symbolize `trace_log` for exporting [#56613](https://github.com/ClickHouse/ClickHouse/pull/56613) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add indices to exported system logs [#56615](https://github.com/ClickHouse/ClickHouse/pull/56615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove dependencies [#56616](https://github.com/ClickHouse/ClickHouse/pull/56616) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* WIP: Add test describing MV deduplication issues [#56621](https://github.com/ClickHouse/ClickHouse/pull/56621) ([Jordi Villar](https://github.com/jrdi)). +* Add test for ROW POLICY ON CLUSTER [#56623](https://github.com/ClickHouse/ClickHouse/pull/56623) ([Nikolay Degterinsky](https://github.com/evillique)). +* Enable --secure flag for clickhouse-client for hostnames pointing to clickhouse cloud [#56638](https://github.com/ClickHouse/ClickHouse/pull/56638) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Continue with work from [#56621](https://github.com/ClickHouse/ClickHouse/issues/56621) [#56641](https://github.com/ClickHouse/ClickHouse/pull/56641) ([Jordi Villar](https://github.com/jrdi)). +* Switch to SSL port for clickhouse-client for hostnames pointing to clickhouse cloud [#56649](https://github.com/ClickHouse/ClickHouse/pull/56649) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Remove garbage from libssh [#56654](https://github.com/ClickHouse/ClickHouse/pull/56654) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete a file [#56655](https://github.com/ClickHouse/ClickHouse/pull/56655) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Delete a file (2) [#56656](https://github.com/ClickHouse/ClickHouse/pull/56656) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some entries from `analyzer_tech_debt.txt` [#56658](https://github.com/ClickHouse/ClickHouse/pull/56658) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous [#56662](https://github.com/ClickHouse/ClickHouse/pull/56662) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump gRPC to v1.55.4 and protobuf to v22.5 [#56664](https://github.com/ClickHouse/ClickHouse/pull/56664) ([Robert Schulze](https://github.com/rschu1ze)). +* Small refactoring of AST hash calculation (follow-up to [#56545](https://github.com/ClickHouse/ClickHouse/issues/56545)) [#56665](https://github.com/ClickHouse/ClickHouse/pull/56665) ([Robert Schulze](https://github.com/rschu1ze)). +* Analyzer: filtering by virtual columns for StorageS3 [#56668](https://github.com/ClickHouse/ClickHouse/pull/56668) ([vdimir](https://github.com/vdimir)). +* Add back flaky tests to analyzer_tech_debt.txt [#56669](https://github.com/ClickHouse/ClickHouse/pull/56669) ([Raúl Marín](https://github.com/Algunenano)). +* gRPC: remove build dependency on systemd [#56671](https://github.com/ClickHouse/ClickHouse/pull/56671) ([Raúl Marín](https://github.com/Algunenano)). +* Remove unused code [#56677](https://github.com/ClickHouse/ClickHouse/pull/56677) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix missing argument for style_check.py in master workflow [#56691](https://github.com/ClickHouse/ClickHouse/pull/56691) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix unexpected parts handling [#56693](https://github.com/ClickHouse/ClickHouse/pull/56693) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Revert "Add a setting max_execution_time_leaf to limit the execution time on shard for distributed query"" [#56707](https://github.com/ClickHouse/ClickHouse/pull/56707) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix use_structure_from_insertion_table_in_table_functions with new Analyzer [#56708](https://github.com/ClickHouse/ClickHouse/pull/56708) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable settings randomisation for `02896_memory_accounting_for_user.sh` [#56709](https://github.com/ClickHouse/ClickHouse/pull/56709) ([Nikita Taranov](https://github.com/nickitat)). +* Light autogenerated file [#56720](https://github.com/ClickHouse/ClickHouse/pull/56720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less CMake checks [#56721](https://github.com/ClickHouse/ClickHouse/pull/56721) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove orphan header files [#56722](https://github.com/ClickHouse/ClickHouse/pull/56722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try to fix hang in 01104_distributed_numbers_test [#56764](https://github.com/ClickHouse/ClickHouse/pull/56764) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Test RabbitMQ with secure connection [#56767](https://github.com/ClickHouse/ClickHouse/pull/56767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test_replicated_merge_tree_encryption_codec. [#56768](https://github.com/ClickHouse/ClickHouse/pull/56768) ([Vitaly Baranov](https://github.com/vitlibar)). +* fix typo in ClickHouseDictionarySource [#56776](https://github.com/ClickHouse/ClickHouse/pull/56776) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Fix pygithub [#56778](https://github.com/ClickHouse/ClickHouse/pull/56778) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add test for avoided recursion [#56785](https://github.com/ClickHouse/ClickHouse/pull/56785) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix randomization of Keeper configs in stress tests [#56788](https://github.com/ClickHouse/ClickHouse/pull/56788) ([Antonio Andelic](https://github.com/antonio2368)). +* Try fix `No user in current context, it's a bug` [#56789](https://github.com/ClickHouse/ClickHouse/pull/56789) ([Antonio Andelic](https://github.com/antonio2368)). +* Update avg_weighted.xml [#56797](https://github.com/ClickHouse/ClickHouse/pull/56797) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better except for SSL authentication failure [#56811](https://github.com/ClickHouse/ClickHouse/pull/56811) ([Nikolay Degterinsky](https://github.com/evillique)). +* More stable `test_keeper_reconfig_replace_leader` [#56835](https://github.com/ClickHouse/ClickHouse/pull/56835) ([Antonio Andelic](https://github.com/antonio2368)). +* Add cancellation hook for moving background operation [#56846](https://github.com/ClickHouse/ClickHouse/pull/56846) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Updated comment in universal.sh [#56852](https://github.com/ClickHouse/ClickHouse/pull/56852) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump gRPC to v1.59 and protobuf to v24.4 [#56853](https://github.com/ClickHouse/ClickHouse/pull/56853) ([Robert Schulze](https://github.com/rschu1ze)). +* Better exception messages [#56854](https://github.com/ClickHouse/ClickHouse/pull/56854) ([Antonio Andelic](https://github.com/antonio2368)). +* Sparse checkout: Use `--remote` for `git submodule update` [#56857](https://github.com/ClickHouse/ClickHouse/pull/56857) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Fix `test_keeper_broken_logs` [#56858](https://github.com/ClickHouse/ClickHouse/pull/56858) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Small cleanup in cpu_features.cmake [#56861](https://github.com/ClickHouse/ClickHouse/pull/56861) ([Robert Schulze](https://github.com/rschu1ze)). +* Planner support transactions [#56867](https://github.com/ClickHouse/ClickHouse/pull/56867) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve diagnostics in test 02908_many_requests_to_system_replicas [#56869](https://github.com/ClickHouse/ClickHouse/pull/56869) ([Alexander Gololobov](https://github.com/davenger)). +* Update 01052_window_view_proc_tumble_to_now.sh [#56870](https://github.com/ClickHouse/ClickHouse/pull/56870) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Call cache check a bit more often [#56872](https://github.com/ClickHouse/ClickHouse/pull/56872) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update test_storage_s3_queue/test.py [#56874](https://github.com/ClickHouse/ClickHouse/pull/56874) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix perf tests report when there are no tests [#56881](https://github.com/ClickHouse/ClickHouse/pull/56881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove ctest [#56894](https://github.com/ClickHouse/ClickHouse/pull/56894) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simpler CMake [#56898](https://github.com/ClickHouse/ClickHouse/pull/56898) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* test for [#56790](https://github.com/ClickHouse/ClickHouse/issues/56790) [#56899](https://github.com/ClickHouse/ClickHouse/pull/56899) ([Denny Crane](https://github.com/den-crane)). +* Allow delegate disk to handle retries for createDirectories [#56905](https://github.com/ClickHouse/ClickHouse/pull/56905) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v23.10.4.25-stable [#56906](https://github.com/ClickHouse/ClickHouse/pull/56906) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.17.13-lts [#56907](https://github.com/ClickHouse/ClickHouse/pull/56907) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.7.24-lts [#56908](https://github.com/ClickHouse/ClickHouse/pull/56908) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.5.29-stable [#56909](https://github.com/ClickHouse/ClickHouse/pull/56909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove outdated instructions [#56911](https://github.com/ClickHouse/ClickHouse/pull/56911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid dependencies with no fixed versions [#56914](https://github.com/ClickHouse/ClickHouse/pull/56914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix race on zk_log initialization [#56915](https://github.com/ClickHouse/ClickHouse/pull/56915) ([Alexander Gololobov](https://github.com/davenger)). +* Check what will happen if I remove some lines [#56916](https://github.com/ClickHouse/ClickHouse/pull/56916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update fasttest [#56919](https://github.com/ClickHouse/ClickHouse/pull/56919) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make some tests independent of macro settings [#56927](https://github.com/ClickHouse/ClickHouse/pull/56927) ([Raúl Marín](https://github.com/Algunenano)). +* Fix flaky 02494_query_cache_events [#56935](https://github.com/ClickHouse/ClickHouse/pull/56935) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CachedReadBufferReadFromCache{Hits,Misses} profile events [#56936](https://github.com/ClickHouse/ClickHouse/pull/56936) ([Jordi Villar](https://github.com/jrdi)). +* Send fatal logs by default in clickhouse-local [#56956](https://github.com/ClickHouse/ClickHouse/pull/56956) ([Nikolay Degterinsky](https://github.com/evillique)). +* Resubmit: Better except for SSL authentication [#56957](https://github.com/ClickHouse/ClickHouse/pull/56957) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `test_keeper_auth` [#56960](https://github.com/ClickHouse/ClickHouse/pull/56960) ([Antonio Andelic](https://github.com/antonio2368)). +* Fewer concurrent requests in 02908_many_requests_to_system_replicas [#56968](https://github.com/ClickHouse/ClickHouse/pull/56968) ([Alexander Gololobov](https://github.com/davenger)). +* Own CMake for GRPC [#56971](https://github.com/ClickHouse/ClickHouse/pull/56971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build in Backups/BackupIO_S3.cpp [#56974](https://github.com/ClickHouse/ClickHouse/pull/56974) ([Robert Schulze](https://github.com/rschu1ze)). +* Add exclude for tryBase64Decode to backward compat test (follow-up to [#56913](https://github.com/ClickHouse/ClickHouse/issues/56913)) [#56975](https://github.com/ClickHouse/ClickHouse/pull/56975) ([Robert Schulze](https://github.com/rschu1ze)). +* Prefer sccache to ccache by default [#56980](https://github.com/ClickHouse/ClickHouse/pull/56980) ([Igor Nikonov](https://github.com/devcrafter)). +* update 02003_memory_limit_in_client.sh [#56981](https://github.com/ClickHouse/ClickHouse/pull/56981) ([Bharat Nallan](https://github.com/bharatnc)). +* Make check for the limited cmake dependencies the part of sparse checkout [#56991](https://github.com/ClickHouse/ClickHouse/pull/56991) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky and slow tests. [#56993](https://github.com/ClickHouse/ClickHouse/pull/56993) ([Amos Bird](https://github.com/amosbird)). +* Fix dropping tables in test_create_or_drop_tables_during_backup [#57007](https://github.com/ClickHouse/ClickHouse/pull/57007) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable Analyzer in Stress and Fuzz tests [#57008](https://github.com/ClickHouse/ClickHouse/pull/57008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Run CI for PRs with missing documentation [#57018](https://github.com/ClickHouse/ClickHouse/pull/57018) ([Michael Kolupaev](https://github.com/al13n321)). +* test_s3_engine_heavy_write_check_mem: turn test off [#57025](https://github.com/ClickHouse/ClickHouse/pull/57025) ([Sema Checherinda](https://github.com/CheSema)). +* NamedCollections: make exception message more informative. [#57031](https://github.com/ClickHouse/ClickHouse/pull/57031) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Avoid returning biggest resolution when fpr > 0.283 [#57034](https://github.com/ClickHouse/ClickHouse/pull/57034) ([Jordi Villar](https://github.com/jrdi)). +* Fix: suppress TSAN in RabbitMQ test [#57040](https://github.com/ClickHouse/ClickHouse/pull/57040) ([Igor Nikonov](https://github.com/devcrafter)). +* Small Keeper fixes [#57047](https://github.com/ClickHouse/ClickHouse/pull/57047) ([Antonio Andelic](https://github.com/antonio2368)). +* Parallel replicas: cleanup, narrow dependency [#57054](https://github.com/ClickHouse/ClickHouse/pull/57054) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix gRPC build on macOS [#57061](https://github.com/ClickHouse/ClickHouse/pull/57061) ([Robert Schulze](https://github.com/rschu1ze)). +* Better comment for ITransformingStep::transformPipeline [#57062](https://github.com/ClickHouse/ClickHouse/pull/57062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Duplicate set` for StorageSet with analyzer. [#57063](https://github.com/ClickHouse/ClickHouse/pull/57063) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Better metadata path [#57083](https://github.com/ClickHouse/ClickHouse/pull/57083) ([Nikolay Degterinsky](https://github.com/evillique)). +* Analyzer fuzzer 3 (aggregate_functions_null_for_empty for projections) [#57099](https://github.com/ClickHouse/ClickHouse/pull/57099) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update numbers.md [#57100](https://github.com/ClickHouse/ClickHouse/pull/57100) ([konruvikt](https://github.com/konruvikt)). +* Fix FunctionNode::toASTImpl [#57102](https://github.com/ClickHouse/ClickHouse/pull/57102) ([vdimir](https://github.com/vdimir)). +* Analyzer fuzzer 5 [#57103](https://github.com/ClickHouse/ClickHouse/pull/57103) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow HashedDictionary/FunctionsConversion as large TU [#57108](https://github.com/ClickHouse/ClickHouse/pull/57108) ([Azat Khuzhin](https://github.com/azat)). +* Disable checksums for builds with fuzzer [#57122](https://github.com/ClickHouse/ClickHouse/pull/57122) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer: Fix logical error in LogicalExpressionOptimizerVisitor [#57123](https://github.com/ClickHouse/ClickHouse/pull/57123) ([vdimir](https://github.com/vdimir)). +* Split HashedDictionary CU [#57124](https://github.com/ClickHouse/ClickHouse/pull/57124) ([Azat Khuzhin](https://github.com/azat)). +* Cancel executor in ~CreatingSetsTransform [#57125](https://github.com/ClickHouse/ClickHouse/pull/57125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix system.*_log in artifacts on CI [#57128](https://github.com/ClickHouse/ClickHouse/pull/57128) ([Azat Khuzhin](https://github.com/azat)). +* Fix something in ReplicatedMergeTree [#57129](https://github.com/ClickHouse/ClickHouse/pull/57129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not symbolize traces for debug/sanitizer builds for sending to cloud [#57130](https://github.com/ClickHouse/ClickHouse/pull/57130) ([Azat Khuzhin](https://github.com/azat)). +* Resubmit 01600_parts_types_metrics test (possibly without flakiness) [#57131](https://github.com/ClickHouse/ClickHouse/pull/57131) ([Azat Khuzhin](https://github.com/azat)). +* Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541) [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to disable reorder-functions-after-sorting optimization [#57144](https://github.com/ClickHouse/ClickHouse/pull/57144) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bad test `00002_log_and_exception_messages_formatting` [#57145](https://github.com/ClickHouse/ClickHouse/pull/57145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test test_replicated_merge_tree_encryption_codec/test.py::test_different_keys [#57146](https://github.com/ClickHouse/ClickHouse/pull/57146) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove partial results from build matrix for stress tests [#57150](https://github.com/ClickHouse/ClickHouse/pull/57150) ([Azat Khuzhin](https://github.com/azat)). +* Minor changes in test_check_table [#57154](https://github.com/ClickHouse/ClickHouse/pull/57154) ([vdimir](https://github.com/vdimir)). +* Fix 02903_rmt_retriable_merge_exception flakiness for replicated database [#57155](https://github.com/ClickHouse/ClickHouse/pull/57155) ([Azat Khuzhin](https://github.com/azat)). +* Mark select() as harmful function [#57156](https://github.com/ClickHouse/ClickHouse/pull/57156) ([Igor Nikonov](https://github.com/devcrafter)). +* Improve the cherry-pick PR description [#57167](https://github.com/ClickHouse/ClickHouse/pull/57167) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add debugging info for 01600_parts_types_metrics on failures [#57170](https://github.com/ClickHouse/ClickHouse/pull/57170) ([Azat Khuzhin](https://github.com/azat)). +* Tiny improvement security [#57171](https://github.com/ClickHouse/ClickHouse/pull/57171) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update blob_storage_log.md [#57187](https://github.com/ClickHouse/ClickHouse/pull/57187) ([vdimir](https://github.com/vdimir)). +* [RFC] Set log_comment to the file name while processing files in client [#57191](https://github.com/ClickHouse/ClickHouse/pull/57191) ([Azat Khuzhin](https://github.com/azat)). +* Add test for [#5323](https://github.com/ClickHouse/ClickHouse/issues/5323) [#57192](https://github.com/ClickHouse/ClickHouse/pull/57192) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer fuzzer 6 (arrayJoin) [#57198](https://github.com/ClickHouse/ClickHouse/pull/57198) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add test for [#47366](https://github.com/ClickHouse/ClickHouse/issues/47366) [#57200](https://github.com/ClickHouse/ClickHouse/pull/57200) ([Raúl Marín](https://github.com/Algunenano)). +* Add test for [#51321](https://github.com/ClickHouse/ClickHouse/issues/51321) [#57202](https://github.com/ClickHouse/ClickHouse/pull/57202) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible crash (in Rust) of fuzzy finder in client [#57204](https://github.com/ClickHouse/ClickHouse/pull/57204) ([Azat Khuzhin](https://github.com/azat)). +* fix zero-copy locks leaking [#57205](https://github.com/ClickHouse/ClickHouse/pull/57205) ([Sema Checherinda](https://github.com/CheSema)). +* Fix test_distributed_storage_configuration flakiness [#57206](https://github.com/ClickHouse/ClickHouse/pull/57206) ([Azat Khuzhin](https://github.com/azat)). +* Update Sentry [#57222](https://github.com/ClickHouse/ClickHouse/pull/57222) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.10.5.20-stable [#57223](https://github.com/ClickHouse/ClickHouse/pull/57223) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.9.6.20-stable [#57224](https://github.com/ClickHouse/ClickHouse/pull/57224) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.18.15-lts [#57225](https://github.com/ClickHouse/ClickHouse/pull/57225) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.8.8.20-lts [#57226](https://github.com/ClickHouse/ClickHouse/pull/57226) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Change cursor style for overwrite mode (INS) to blinking in client [#57227](https://github.com/ClickHouse/ClickHouse/pull/57227) ([Azat Khuzhin](https://github.com/azat)). +* Remove test `01280_ttl_where_group_by` [#57230](https://github.com/ClickHouse/ClickHouse/pull/57230) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix docs [#57234](https://github.com/ClickHouse/ClickHouse/pull/57234) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove addBatchSinglePlaceFromInterval [#57258](https://github.com/ClickHouse/ClickHouse/pull/57258) ([Raúl Marín](https://github.com/Algunenano)). +* Add some additional groups to CI [#57260](https://github.com/ClickHouse/ClickHouse/pull/57260) ([alesapin](https://github.com/alesapin)). +* Analyzer: fix result type of aggregate function with NULL [#57265](https://github.com/ClickHouse/ClickHouse/pull/57265) ([vdimir](https://github.com/vdimir)). +* Ignore memory exception in Keeper asio workers [#57268](https://github.com/ClickHouse/ClickHouse/pull/57268) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix code reports [#57301](https://github.com/ClickHouse/ClickHouse/pull/57301) ([Raúl Marín](https://github.com/Algunenano)). +* Follow up recommendations from [#57167](https://github.com/ClickHouse/ClickHouse/issues/57167) [#57302](https://github.com/ClickHouse/ClickHouse/pull/57302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add back flaky tests to analyzer_tech_debt.txt [#57307](https://github.com/ClickHouse/ClickHouse/pull/57307) ([Raúl Marín](https://github.com/Algunenano)). +* Lower level for annoying S3 log [#57312](https://github.com/ClickHouse/ClickHouse/pull/57312) ([Antonio Andelic](https://github.com/antonio2368)). +* Add regression test for skim (Rust) crash on pasting certain input [#57313](https://github.com/ClickHouse/ClickHouse/pull/57313) ([Azat Khuzhin](https://github.com/azat)). +* Remove unused Strings from MergeTreeData [#57318](https://github.com/ClickHouse/ClickHouse/pull/57318) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Address 02668_ulid_decoding flakiness [#57320](https://github.com/ClickHouse/ClickHouse/pull/57320) ([Raúl Marín](https://github.com/Algunenano)). +* DiskWeb fix [#57322](https://github.com/ClickHouse/ClickHouse/pull/57322) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update README.md [#57325](https://github.com/ClickHouse/ClickHouse/pull/57325) ([Tyler Hannan](https://github.com/tylerhannan)). +* Add information about new _size virtual column in file/s3/url/hdfs/azure table functions [#57328](https://github.com/ClickHouse/ClickHouse/pull/57328) ([Kruglov Pavel](https://github.com/Avogar)). +* Follow-up to [#56490](https://github.com/ClickHouse/ClickHouse/issues/56490): Fix build with `cmake -DENABLE_LIBRARIES=0` [#57330](https://github.com/ClickHouse/ClickHouse/pull/57330) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark a setting obsolete [#57336](https://github.com/ClickHouse/ClickHouse/pull/57336) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Always renew ZK client in `WithRetries` [#57357](https://github.com/ClickHouse/ClickHouse/pull/57357) ([Antonio Andelic](https://github.com/antonio2368)). +* Shutdown disks after tables [#57358](https://github.com/ClickHouse/ClickHouse/pull/57358) ([Alexander Gololobov](https://github.com/davenger)). +* Update DDLTask.cpp [#57369](https://github.com/ClickHouse/ClickHouse/pull/57369) ([Alexander Tokmakov](https://github.com/tavplubix)). +* verbose exception messages for StorageFuzzJSON [#57372](https://github.com/ClickHouse/ClickHouse/pull/57372) ([Julia Kartseva](https://github.com/jkartseva)). +* Initialize only required disks in clickhouse-disks [#57387](https://github.com/ClickHouse/ClickHouse/pull/57387) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow wildcards in directories for partitioned write with File storage [#57391](https://github.com/ClickHouse/ClickHouse/pull/57391) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add tests for 43202 [#57404](https://github.com/ClickHouse/ClickHouse/pull/57404) ([Raúl Marín](https://github.com/Algunenano)). +* Consider whole check failure in bugfix validate check as an error [#57413](https://github.com/ClickHouse/ClickHouse/pull/57413) ([vdimir](https://github.com/vdimir)). +* Change type of s3_cache in test_encrypted_disk [#57416](https://github.com/ClickHouse/ClickHouse/pull/57416) ([vdimir](https://github.com/vdimir)). +* Add extra debug information on replication consistency errors [#57419](https://github.com/ClickHouse/ClickHouse/pull/57419) ([Raúl Marín](https://github.com/Algunenano)). +* Don't print server revision in client on connect [#57435](https://github.com/ClickHouse/ClickHouse/pull/57435) ([Nikita Taranov](https://github.com/nickitat)). +* Adding Sydney Meetup [#57457](https://github.com/ClickHouse/ClickHouse/pull/57457) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix adjusting log_comment in case of multiple files passed [#57464](https://github.com/ClickHouse/ClickHouse/pull/57464) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 02697_stop_reading_on_first_cancel.sh [#57481](https://github.com/ClickHouse/ClickHouse/pull/57481) ([Raúl Marín](https://github.com/Algunenano)). +* Tiny refactoring around cache [#57482](https://github.com/ClickHouse/ClickHouse/pull/57482) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decrease default value for `filesystem_prefetch_min_bytes_for_single_read_task` [#57489](https://github.com/ClickHouse/ClickHouse/pull/57489) ([Nikita Taranov](https://github.com/nickitat)). +* Remove bad test [#57494](https://github.com/ClickHouse/ClickHouse/pull/57494) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add changelog for 23.11 [#57517](https://github.com/ClickHouse/ClickHouse/pull/57517) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simple cleanup in distributed (while dealing with parallel replicas) [#57518](https://github.com/ClickHouse/ClickHouse/pull/57518) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove a feature. [#57521](https://github.com/ClickHouse/ClickHouse/pull/57521) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `S3Queue` is production ready [#57548](https://github.com/ClickHouse/ClickHouse/pull/57548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Merge pull request [#56724](https://github.com/ClickHouse/ClickHouse/issues/56724) from canhld94/ch_replicated_column_mismatch" [#57576](https://github.com/ClickHouse/ClickHouse/pull/57576) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index a04d676e92d..eddf24448c1 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -28,18 +28,20 @@ sudo apt-get install clang-17 Let’s remember the path where we install `cctools` as ${CCTOOLS} ``` bash +mkdir ~/cctools export CCTOOLS=$(cd ~/cctools && pwd) -mkdir ${CCTOOLS} cd ${CCTOOLS} -git clone --depth=1 https://github.com/tpoechtrager/apple-libtapi.git +git clone https://github.com/tpoechtrager/apple-libtapi.git cd apple-libtapi +git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 INSTALLPREFIX=${CCTOOLS} ./build.sh ./install.sh cd .. -git clone --depth=1 https://github.com/tpoechtrager/cctools-port.git +git clone https://github.com/tpoechtrager/cctools-port.git cd cctools-port/cctools +git checkout 2a3e1c2a6ff54a30f898b70cfb9ba1692a55fad7 ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=x86_64-apple-darwin make install ``` diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index e65de4a37e0..39ccc9a78c3 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -3,7 +3,7 @@ slug: /en/development/build-osx sidebar_position: 65 sidebar_label: Build on macOS title: How to Build ClickHouse on macOS -description: How to build ClickHouse on macOS +description: How to build ClickHouse on macOS for macOS --- :::info You don't have to build ClickHouse yourself! diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 9d6a80de904..31346c77949 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -7,42 +7,39 @@ description: Prerequisites and an overview of how to build ClickHouse # Getting Started Guide for Building ClickHouse -The building of ClickHouse is supported on Linux, FreeBSD and macOS. +ClickHouse can be build on Linux, FreeBSD and macOS. If you use Windows, you can still build ClickHouse in a virtual machine running Linux, e.g. [VirtualBox](https://www.virtualbox.org/) with Ubuntu. -If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. - -ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. +ClickHouse requires a 64-bit system to compile and run, 32-bit systems do not work. ## Creating a Repository on GitHub {#creating-a-repository-on-github} -To start working with ClickHouse repository you will need a GitHub account. +To start developing for ClickHouse you will need a [GitHub](https://www.virtualbox.org/) account. Please also generate a SSH key locally (if you don't have one already) and upload the public key to GitHub as this is a prerequisite for contributing patches. -You probably already have one, but if you do not, please register at https://github.com. In case you do not have SSH keys, you should generate them and then upload them on GitHub. It is required for sending over your patches. It is also possible to use the same SSH keys that you use with any other SSH servers - probably you already have those. +Next, create a fork of the [ClickHouse repository](https://github.com/ClickHouse/ClickHouse/) in your personal account by clicking the "fork" button in the upper right corner. -Create a fork of ClickHouse repository. To do that please click on the “fork” button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account. +To contribute, e.g. a fix for an issue or a feature, please commit your changes to a branch in your fork, then create a "pull request" with the changes to the main repository. -The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse). +For working with Git repositories, please install `git`. In Ubuntu run these commands in a terminal: -To work with Git repositories, please install `git`. To do that in Ubuntu you would run in the command line terminal: +```sh +sudo apt update +sudo apt install git +``` - sudo apt update - sudo apt install git - -A brief manual on using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf). -For a detailed manual on Git see [here](https://git-scm.com/book/en/v2). +A cheatsheet for using Git can be found [here](https://education.github.com/git-cheat-sheet-education.pdf). The detailed manual for Git is [here](https://git-scm.com/book/en/v2). ## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine} -Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine. +First, download the source files to your working machine, i.e. clone the repository: -Run in your terminal: +```sh +git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name +cd ClickHouse +``` - git clone git@github.com:your_github_username/ClickHouse.git # replace placeholder with your GitHub user name - cd ClickHouse +This command creates a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory after the URL but it is important that this path does not contain whitespaces as it may lead to problems with the build later on. -This command will create a directory `ClickHouse/` containing the source code of ClickHouse. If you specify a custom checkout directory (after the URL), it is important that this path does not contain whitespaces as it may lead to problems with the build system. - -To make library dependencies available for the build, the ClickHouse repository uses Git submodules, i.e. references to external repositories. These are not checked out by default. To do so, you can either +The ClickHouse repository uses Git submodules, i.e. references to external repositories (usually 3rd party libraries used by ClickHouse). These are not checked out by default. To do so, you can either - run `git clone` with option `--recurse-submodules`, @@ -52,7 +49,7 @@ To make library dependencies available for the build, the ClickHouse repository You can check the Git status with the command: `git submodule status`. -If you get the following error message: +If you get the following error message Permission denied (publickey). fatal: Could not read from remote repository. @@ -60,7 +57,7 @@ If you get the following error message: Please make sure you have the correct access rights and the repository exists. -It generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in the settings section of GitHub UI. +it generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in GitHub's settings. You can also clone the repository via https protocol: @@ -74,12 +71,17 @@ You can also add original ClickHouse repo address to your local repository to pu After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. +:::note +Instructions below assume you are building on Linux. If you are cross-compiling or building on macOS, please also check for operating system and architecture specific guides, such as building [on macOS for macOS](build-osx.md), [on Linux for macOS](build-cross-osx.md), [on Linux for Linux/RISC-V](build-cross-riscv.md) and so on. +::: + ## Build System {#build-system} ClickHouse uses CMake and Ninja for building. -CMake - a meta-build system that can generate Ninja files (build tasks). -Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. +- CMake - a meta-build system that can generate Ninja files (build tasks). + +- Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks. To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`. diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 2a9b25e0c1a..8b7f86cce5c 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -4,16 +4,9 @@ sidebar_position: 181 sidebar_label: S3Queue --- -# [experimental] S3Queue Table Engine +# S3Queue Table Engine This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features. -:::note -This table engine is experimental. To use it, set `allow_experimental_s3queue` to 1 by using the `SET` command: -```sql -SET allow_experimental_s3queue=1 -``` -::: - ## Create Table {#creating-a-table} ``` sql diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 89b002da192..55adf4208f8 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -865,10 +865,10 @@ Tags: - `disk` — a disk within a volume. - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. -- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. - `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume. - `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. - `least_used_ttl_ms` - Configure timeout (in milliseconds) for the updating available space on all disks (`0` - update always, `-1` - never update, default is `60000`). Note, if the disk can be used by ClickHouse only and is not subject to a online filesystem resize/shrink you can use `-1`, in all other cases it is not recommended, since eventually it will lead to incorrect space distribution. +- `prefer_not_to_merge` — You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation). When this setting is enabled (don't do it), merging data on this volume is not allowed (which is bad). This allows (but you don't need it) controlling (if you want to control something, you're making a mistake) how ClickHouse works with slow disks (but ClickHouse knows better, so please don't use this setting). Configuration examples: @@ -905,7 +905,6 @@ Configuration examples: external - true diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 14431c4c43b..6224c450ea2 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -1,13 +1,16 @@ --- -slug: /en/engines/table-engines/special/distributed +sidebar_label: "Distributed" sidebar_position: 10 -sidebar_label: Distributed +slug: /en/engines/table-engines/special/distributed --- # Distributed Table Engine -Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. -Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. +:::warning +To create a distributed table engine in the cloud, you can use the [remote and remoteSecure](../../../sql-reference/table-functions/remote) table functions. The `Distributed(...)` syntax cannot be used in ClickHouse Cloud. +::: + +Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. ## Creating a Table {#distributed-creating-a-table} @@ -22,6 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` ### From a Table {#distributed-from-a-table} + When the `Distributed` table is pointing to a table on the current server you can adopt that table's schema: ``` sql @@ -48,7 +52,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 Specifying the `sharding_key` is necessary for the following: -- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key +- For `INSERTs` into a distributed table (as the table engine needs the `sharding_key` to determine how to split the data). However, if `insert_distributed_one_random_shard` setting is enabled, then `INSERTs` do not need the sharding key. - For use with `optimize_skip_unused_shards` as the `sharding_key` is necessary to determine what shards should be queried #### policy_name @@ -122,9 +126,7 @@ SETTINGS fsync_directories=0; ``` -Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. -Data is not only read but is partially processed on the remote servers (to the extent that this is possible). -For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. +Data will be read from all servers in the `logs` cluster, from the `default.hits` table located on every server in the cluster. Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with `GROUP BY`, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. Instead of the database name, you can use a constant expression that returns a string. For example: `currentDatabase()`. @@ -183,9 +185,7 @@ Clusters are configured in the [server configuration file](../../../operations/c ``` -Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. -Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). -Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas). +Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas). Cluster names must not contain dots. @@ -198,9 +198,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com - `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `9440` and be configured with correct certificates. - `compression` - Use data compression. Default value: `true`. -When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. -If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. -This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. +When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting. If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times. This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly. You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 52fa1689d9d..e8662ec16fa 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -56,7 +56,7 @@ On Linux, macOS and FreeBSD: ./clickhouse client ClickHouse client version 23.2.1.1501 (official build). Connecting to localhost:9000 as user default. - Connected to ClickHouse server version 23.2.1 revision 54461. + Connected to ClickHouse server version 23.2.1. local-host :) ``` @@ -81,7 +81,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun sudo apt-get install -y apt-transport-https ca-certificates dirmngr GNUPGHOME=$(mktemp -d) sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754 -sudo rm -r "$GNUPGHOME" +sudo rm -rf "$GNUPGHOME" sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index f018f3a248e..a53844e792f 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -16,7 +16,7 @@ ClickHouse provides a native command-line client: `clickhouse-client`. The clien $ clickhouse-client ClickHouse client version 20.13.1.5273 (official build). Connecting to localhost:9000 as user default. -Connected to ClickHouse server version 20.13.1 revision 54442. +Connected to ClickHouse server version 20.13.1. :) ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 57de0555bf6..836b1f2f637 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -478,6 +478,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. +- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 0aadb09730a..ef858796936 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -834,6 +834,27 @@ $$) └──────────────┴───────────────┘ ``` +#### CSV settings {#csv-settings} + +##### input_format_csv_try_infer_numbers_from_strings + +Enabling this setting allows inferring numbers from string values. + +This setting is disabled by default. + +**Example:** + +```sql +SET input_format_json_try_infer_numbers_from_strings = 1; +DESC format(CSV, '"42","42.42"'); +``` +```reponse +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ c1 │ Nullable(Int64) │ │ │ │ │ │ +│ c2 │ Nullable(Float64) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + ### TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using @@ -1846,3 +1867,102 @@ DESC format(JSONAsString, '{"x" : 42, "y" : "Hello, World!"}') SETTINGS allow_ex │ json │ Object('json') │ │ │ │ │ │ └──────┴────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` + +## Schema inference modes {#schema-inference-modes} + +Schema inference from the set of data files can work in 2 different modes: `default` and `union`. +The mode is controlled by the setting `schema_inference_mode`. + +### Default mode {#default-schema-inference-mode} + +In default mode, ClickHouse assumes that all files have the same schema and tries to infer the schema by reading files one by one until it succeeds. + +Example: + +Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content: + +`data1.jsonl`: +```json +{"field1" : 1, "field2" : null} +{"field1" : 2, "field2" : null} +{"field1" : 3, "field2" : null} +``` + +`data2.jsonl`: +```json +{"field1" : 4, "field2" : "Data4"} +{"field1" : 5, "field2" : "Data5"} +{"field1" : 6, "field2" : "Data5"} +``` + +`data3.jsonl`: +```json +{"field1" : 7, "field2" : "Data7", "field3" : [1, 2, 3]} +{"field1" : 8, "field2" : "Data8", "field3" : [4, 5, 6]} +{"field1" : 9, "field2" : "Data9", "field3" : [7, 8, 9]} +``` + +Let's try to use schema inference on these 3 files: +```sql +:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='default' +``` + +Result: +```text +┌─name───┬─type─────────────┐ +│ field1 │ Nullable(Int64) │ +│ field2 │ Nullable(String) │ +└────────┴──────────────────┘ +``` + +As we can see, we don't have `field3` from file `data3.jsonl`. +It happens because ClickHouse first tried to infer schema from file `data1.jsonl`, failed because of only nulls for field `field2`, +and then tried to infer schema from `data2.jsonl` and succeeded, so data from file `data3.jsonl` wasn't read. + +### Union mode {#default-schema-inference-mode} + +In union mode, ClickHouse assumes that files can have different schemas, so it infer schemas of all files and then union them to the common schema. + +Let's say we have 3 files `data1.jsonl`, `data2.jsonl` and `data3.jsonl` with the next content: + +`data1.jsonl`: +```json +{"field1" : 1} +{"field1" : 2} +{"field1" : 3} +``` + +`data2.jsonl`: +```json +{"field2" : "Data4"} +{"field2" : "Data5"} +{"field2" : "Data5"} +``` + +`data3.jsonl`: +```json +{"field3" : [1, 2, 3]} +{"field3" : [4, 5, 6]} +{"field3" : [7, 8, 9]} +``` + +Let's try to use schema inference on these 3 files: +```sql +:) DESCRIBE file('data{1,2,3}.jsonl') SETTINGS schema_inference_mode='union' +``` + +Result: +```text +┌─name───┬─type───────────────────┐ +│ field1 │ Nullable(Int64) │ +│ field2 │ Nullable(String) │ +│ field3 │ Array(Nullable(Int64)) │ +└────────┴────────────────────────┘ +``` + +As we can see, we have all fields from all files. + +Note: +- As some of the files may not contain some columns from the resulting schema, union mode is supported only for formats that support reading subset of columns (like JSONEachRow, Parquet, TSVWithNames, etc) and won't work for other formats (like CSV, TSV, JSONCompactEachRow, etc). +- If ClickHouse cannot infer the schema from one of the files, the exception will be thrown. +- If you have a lot of files, reading schema from all of them can take a lot of time. diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index ebf981690a9..adc384e21ae 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -15,6 +15,27 @@ You can monitor: - Utilization of hardware resources. - ClickHouse server metrics. +## Built-in observability dashboard + +Screenshot 2023-11-12 at 6 08 58 PM + +ClickHouse comes with a built-in observability dashboard feature which can be accessed by `$HOST:$PORT/dashboard` (requires user and password) that shows the following metrics: +- Queries/second +- CPU usage (cores) +- Queries running +- Merges running +- Selected bytes/second +- IO wait +- CPU wait +- OS CPU Usage (userspace) +- OS CPU Usage (kernel) +- Read from disk +- Read from filesystem +- Memory (tracked) +- Inserted rows/second +- Total MergeTree parts +- Max parts for partition + ## Resource Utilization {#resource-utilization} ClickHouse also monitors the state of hardware resources by itself such as: diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index ec59cfeee73..cc2692b8e02 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1646,6 +1646,45 @@ Default value: `0.5`. +## async_load_databases {#async_load_databases} + +Asynchronous loading of databases and tables. + +If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.async_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up. + +If `false`, all databases are loaded when the server starts. + +The default is `false`. + +**Example** + +``` xml +true +``` + +## tables_loader_foreground_pool_size {#tables_loader_foreground_pool_size} + +Sets the number of threads performing load jobs in foreground pool. The foreground pool is used for loading table synchronously before server start listening on a port and for loading tables that are waited for. Foreground pool has higher priority than background pool. It means that no job starts in background pool while there are jobs running in foreground pool. + +Possible values: + +- Any positive integer. +- Zero. Use all available CPUs. + +Default value: 0. + + +## tables_loader_background_pool_size {#tables_loader_background_pool_size} + +Sets the number of threads performing asynchronous load jobs in background pool. The background pool is used for loading tables asynchronously after server start in case there are no queries waiting for the table. It could be beneficial to keep low number of threads in background pool if there are a lot of tables. It will reserve CPU resources for concurrent query execution. + +Possible values: + +- Any positive integer. +- Zero. Use all available CPUs. + +Default value: 0. + ## merge_tree {#merge_tree} diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index a0428bd0977..c7e461d15ae 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -149,7 +149,7 @@ Possible values: - Any positive integer. - 0 (disable deduplication) -Default value: 100. +Default value: 1000. The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper. A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 344e6dda680..3d76bd9df73 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1130,6 +1130,13 @@ Result a 0 1971-01-01 ``` +## input_format_csv_try_infer_numbers_from_strings {#input_format_csv_try_infer_numbers_from_strings} + +If enabled, during schema inference ClickHouse will try to infer numbers from string fields. +It can be useful if CSV data contains quoted UInt64 numbers. + +Disabled by default. + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c8d54d76704..5c509058cbb 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2647,7 +2647,7 @@ Default value: 0. ## input_format_parallel_parsing {#input-format-parallel-parsing} -Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. +Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -2658,7 +2658,7 @@ Default value: `1`. ## output_format_parallel_formatting {#output-format-parallel-formatting} -Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. +Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -4349,6 +4349,8 @@ Default value: `1GiB`. ## Schema Inference settings +See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details. + ### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} Enable schemas cache for schema inference in `file` table function. @@ -4390,6 +4392,13 @@ Possible values: Default value: 2. +### schema_inference_mode {schema_inference_mode} + +The mode of schema inference. Possible values: `default` and `union`. +See [schema inference modes](../../interfaces/schema-inference.md#schema-inference-modes) section for more details. + +Default value: `default`. + ## compatibility {#compatibility} The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting. diff --git a/docs/en/operations/system-tables/async_loader.md b/docs/en/operations/system-tables/async_loader.md new file mode 100644 index 00000000000..4e8651a6d3e --- /dev/null +++ b/docs/en/operations/system-tables/async_loader.md @@ -0,0 +1,54 @@ +--- +slug: /en/operations/system-tables/async_loader +--- +# async_loader + +Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job. There is a tool for visualizing information from this table `utils/async_loader_graph`. + +Example: + +``` sql +SELECT * +FROM system.async_loader +FORMAT Vertical +LIMIT 1 +``` + +``` text +``` + +Columns: + +- `job` (`String`) - Job name (may be not unique). +- `job_id` (`UInt64`) - Unique ID of the job. +- `dependencies` (`Array(UInt64)`) - List of IDs of jobs that should be done before this job. +- `dependencies_left` (`UInt64`) - Current number of dependencies left to be done. +- `status` (`Enum`) - Current load status of a job: + `PENDING`: Load job is not started yet. + `OK`: Load job executed and was successful. + `FAILED`: Load job executed and failed. + `CANCELED`: Load job is not going to be executed due to removal or dependency failure. + +A pending job might be in one of the following states: +- `is_executing` (`UInt8`) - The job is currently being executed by a worker. +- `is_blocked` (`UInt8`) - The job waits for its dependencies to be done. +- `is_ready` (`UInt8`) - The job is ready to be executed and waits for a worker. +- `elapsed` (`Float64`) - Seconds elapsed since start of execution. Zero if job is not started. Total execution time if job finished. + +Every job has a pool associated with it and is started in this pool. Each pool has a constant priority and a mutable maximum number of workers. Higher priority (lower `priority` value) jobs are run first. No job with lower priority is started while there is at least one higher priority job ready or executing. Job priority can be elevated (but cannot be lowered) by prioritizing it. For example jobs for a table loading and startup will be prioritized if incoming query required this table. It is possible prioritize a job during its execution, but job is not moved from its `execution_pool` to newly assigned `pool`. The job uses `pool` for creating new jobs to avoid priority inversion. Already started jobs are not preempted by higher priority jobs and always run to completion after start. +- `pool_id` (`UInt64`) - ID of a pool currently assigned to the job. +- `pool` (`String`) - Name of `pool_id` pool. +- `priority` (`Int64`) - Priority of `pool_id` pool. +- `execution_pool_id` (`UInt64`) - ID of a pool the job is executed in. Equals initially assigned pool before execution starts. +- `execution_pool` (`String`) - Name of `execution_pool_id` pool. +- `execution_priority` (`Int64`) - Priority of `execution_pool_id` pool. + +- `ready_seqno` (`Nullable(UInt64)`) - Not null for ready jobs. Worker pulls the next job to be executed from a ready queue of its pool. If there are multiple ready jobs, then job with the lowest value of `ready_seqno` is picked. +- `waiters` (`UInt64`) - The number of threads waiting on this job. +- `exception` (`Nullable(String)`) - Not null for failed and canceled jobs. Holds error message raised during query execution or error leading to cancelling of this job along with dependency failure chain of job names. + +Time instants during job lifetime: +- `schedule_time` (`DateTime64`) - Time when job was created and scheduled to be executed (usually with all its dependencies). +- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet. +- `start_time` (`Nullable(DateTime64)`) - Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet. +- `finish_time` (`Nullable(DateTime64)`) - Time when job execution is finished. Null if the job is not finished yet. diff --git a/docs/en/operations/system-tables/asynchronous_insert_log.md b/docs/en/operations/system-tables/asynchronous_insert_log.md index c3aaa8e6c41..d5f6ab07b10 100644 --- a/docs/en/operations/system-tables/asynchronous_insert_log.md +++ b/docs/en/operations/system-tables/asynchronous_insert_log.md @@ -13,6 +13,7 @@ ClickHouse does not delete data from the table automatically. See [Introduction] Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the async insert happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the async insert finished execution. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the async insert finished execution with microseconds precision. @@ -42,6 +43,7 @@ SELECT * FROM system.asynchronous_insert_log LIMIT 1 \G; Result: ``` text +hostname: clickhouse.eu-central1.internal event_date: 2023-06-08 event_time: 2023-06-08 10:08:53 event_time_microseconds: 2023-06-08 10:08:53.199516 diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index efe57a202d8..65b2e349707 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -7,6 +7,7 @@ Contains the historical values for `system.asynchronous_metrics`, which are save Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. @@ -15,22 +16,33 @@ Columns: **Example** ``` sql -SELECT * FROM system.asynchronous_metric_log LIMIT 10 +SELECT * FROM system.asynchronous_metric_log LIMIT 3 \G ``` ``` text -┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ +Row 1: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:07 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0.001 + +Row 2: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:08 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0 + +Row 3: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2023-11-14 +event_time: 2023-11-14 14:39:09 +metric: AsynchronousHeavyMetricsCalculationTimeSpent +value: 0 ``` **See Also** diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md index 7e088fcad94..c73fd26683e 100644 --- a/docs/en/operations/system-tables/backup_log.md +++ b/docs/en/operations/system-tables/backup_log.md @@ -7,6 +7,7 @@ Contains logging entries with the information about `BACKUP` and `RESTORE` opera Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. - `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. @@ -45,6 +46,7 @@ SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885 ```response Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:05:21.998566 id: e5b74ecb-f6f1-426a-80be-872f90043885 @@ -63,6 +65,7 @@ bytes_read: 0 Row 2: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:08:56.916192 id: e5b74ecb-f6f1-426a-80be-872f90043885 @@ -93,6 +96,7 @@ SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 ```response Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:09:19.718077 id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 @@ -111,6 +115,7 @@ bytes_read: 0 Row 2: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-08-19 event_time_microseconds: 2023-08-19 11:09:29.334234 id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index 4d015a513a2..e83da3624b2 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -7,6 +7,7 @@ Contains information about stack traces for fatal errors. The table does not exi Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. @@ -32,6 +33,7 @@ Result (not full): ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-10-14 event_time: 2020-10-14 15:47:40 timestamp_ns: 1602679660271312710 diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 9ea0dde3f80..f0b717a3bbf 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -6,6 +6,7 @@ slug: /en/operations/system-tables/metric_log Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. @@ -19,6 +20,7 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-05 event_time: 2020-09-05 16:22:33 event_time_microseconds: 2020-09-05 16:22:33.196807 diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index ae0e7620d35..3dec6345eb6 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -45,6 +45,22 @@ Number of threads in the Aggregator thread pool. Number of threads in the Aggregator thread pool running a task. +### TablesLoaderForegroundThreads + +Number of threads in the async loader foreground thread pool. + +### TablesLoaderForegroundThreadsActive + +Number of threads in the async loader foreground thread pool running a task. + +### TablesLoaderBackgroundThreads + +Number of threads in the async loader background thread pool. + +### TablesLoaderBackgroundThreadsActive + +Number of threads in the async loader background thread pool running a task. + ### AsyncInsertCacheSize Number of async insert hash id in cache @@ -197,14 +213,6 @@ Number of threads in the DatabaseOnDisk thread pool. Number of threads in the DatabaseOnDisk thread pool running a task. -### DatabaseOrdinaryThreads - -Number of threads in the Ordinary database thread pool. - -### DatabaseOrdinaryThreadsActive - -Number of threads in the Ordinary database thread pool running a task. - ### DelayedInserts Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table. @@ -625,14 +633,6 @@ Number of connections that are sending data for external tables to remote server Number of connections that are sending data for scalars to remote servers. -### StartupSystemTablesThreads - -Number of threads in the StartupSystemTables thread pool. - -### StartupSystemTablesThreadsActive - -Number of threads in the StartupSystemTables thread pool running a task. - ### StorageBufferBytes Number of bytes in buffers of Buffer tables @@ -677,14 +677,6 @@ Number of threads in the system.replicas thread pool running a task. Number of connections to TCP server (clients with native interface), also included server-server distributed query connections -### TablesLoaderThreads - -Number of threads in the tables loader thread pool. - -### TablesLoaderThreadsActive - -Number of threads in the tables loader thread pool running a task. - ### TablesToDropQueueSize Number of dropped tables, that are waiting for background data removal. diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index 68efeb2ee38..002bf8a75df 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -31,3 +31,26 @@ SELECT * FROM system.numbers LIMIT 10; 10 rows in set. Elapsed: 0.001 sec. ``` + +You can also limit the output by predicates. + +```sql +SELECT * FROM system.numbers < 10; +``` + +```response +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 4 │ +│ 5 │ +│ 6 │ +│ 7 │ +│ 8 │ +│ 9 │ +└────────┘ + +10 rows in set. Elapsed: 0.001 sec. +``` diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index a605a46c14c..5f03c2f8ada 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -8,28 +8,19 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp Columns: - `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. - - `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. - - `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. - - `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. - - `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. - `INTERNAL` — Indicates that the span represents an internal operation within an application. - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. - `CLIENT` — Indicates that the span describes a request to some remote service. - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. - - `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). - - `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). - - `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. - - `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. - - `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. **Example** diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index b9185434e01..af582646653 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -9,6 +9,7 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. - `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NewPart` — Inserting of a new data part. @@ -56,13 +57,14 @@ SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 event_type: NewPart merge_reason: NotAMerge merge_algorithm: Undecided event_date: 2021-02-02 event_time: 2021-02-02 11:14:28 -event_time_microseconds: 2021-02-02 11:14:28.861919 +event_time_microseconds: 2021-02-02 11:14:28.861919 duration_ms: 35 database: default table: log_mt_2 diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index 5eedb5a5dae..3c2a090efe3 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -4,6 +4,7 @@ This table contains profiling on processors level (that you can find in [`EXPLAI Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index ced97166702..4f5e214f1ce 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -34,6 +34,7 @@ You can use the [log_formatted_queries](../../operations/settings/settings.md#se Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - `'QueryStart' = 1` — Successful start of query execution. - `'QueryFinish' = 2` — Successful end of query execution. @@ -127,6 +128,7 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_t ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: QueryFinish event_date: 2021-11-03 event_time: 2021-11-03 16:13:54 @@ -167,7 +169,7 @@ initial_query_start_time: 2021-11-03 16:13:54 initial_query_start_time_microseconds: 2021-11-03 16:13:54.952325 interface: 1 os_user: sevirov -client_hostname: clickhouse.ru-central1.internal +client_hostname: clickhouse.eu-central1.internal client_name: ClickHouse client_revision: 54449 client_version_major: 21 diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index a6d5632ade9..a198d7c304f 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. - `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. @@ -74,6 +75,7 @@ Columns: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-11 event_time: 2020-09-11 10:08:17 event_time_microseconds: 2020-09-11 10:08:17.134042 diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index e107e4f926c..4dd8dd7420d 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. - `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. @@ -59,6 +60,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2021-06-22 event_time: 2021-06-22 13:23:07 event_time_microseconds: 2021-06-22 13:23:07.738221 diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index 5b1a2b2a489..5c6096b3adf 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -7,6 +7,7 @@ Contains information about all successful and failed login and logout events. Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: - `LoginFailure` — Login error. - `LoginSuccess` — Successful login. @@ -57,6 +58,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: LoginSuccess auth_id: 45e6bd83-b4aa-4a23-85e6-bd83b4aa1a23 session_id: diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 5c7184b2b22..24271a943a4 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -17,7 +17,7 @@ Columns: - `UNKNOWN` - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). - `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. Should be always false. When this setting is enabled, you did a mistake. - `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). - `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values: - `ROUND_ROBIN` diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index 897cefab0be..6ac1ddbf667 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -7,6 +7,7 @@ Contains logging entries. The logging level which goes to this table can be limi Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` (Date) — Date of the entry. - `event_time` (DateTime) — Time of the entry. - `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. @@ -39,6 +40,7 @@ SELECT * FROM system.text_log LIMIT 1 \G ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-10 event_time: 2020-09-10 11:23:07 event_time_microseconds: 2020-09-10 11:23:07.871397 diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 1396244a12a..5adc33de37f 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,37 +12,27 @@ To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressTo Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. - - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. - - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. - - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. - - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. + When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server. - `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: - - `Real` represents collecting stack traces by wall-clock time. - `CPU` represents collecting stack traces by CPU time. - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. - `MemorySample` represents collecting random allocations and deallocations. - `MemoryPeak` represents collecting updates of peak memory usage. - `ProfileEvent` represents collecting of increments of profile events. - - `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. - - `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. - - `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. - - `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. - - `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. - - `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0. **Example** @@ -54,6 +44,7 @@ SELECT * FROM system.trace_log LIMIT 1 \G ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2020-09-10 event_time: 2020-09-10 11:23:09 event_time_microseconds: 2020-09-10 11:23:09.872924 diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index dce5be29f62..dd2df2ba5fc 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -9,6 +9,7 @@ For requests, only columns with request parameters are filled in, and the remain Columns with request parameters: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: - `Request` — The request has been sent. - `Response` — The response was received. @@ -63,6 +64,7 @@ Result: ``` text Row 1: ────── +hostname: clickhouse.eu-central1.internal type: Request event_date: 2021-08-09 event_time: 2021-08-09 21:38:30.291792 diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 8f6cf6ad147..757afff599c 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -93,7 +93,7 @@ While ClickHouse can work over NFS, it is not the best idea. ## Linux Kernel {#linux-kernel} -Don’t use an outdated Linux kernel. +Don't use an outdated Linux kernel. ## Network {#network} diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index c863282efc1..1dac2d25ea5 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -216,7 +216,6 @@ Arguments: - `--logger.level` — Log level. - `--ignore-error` — do not stop processing if a query failed. - `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. -- `--no-system-tables` — do not attach system tables. - `--help` — arguments references for `clickhouse-local`. - `-V`, `--version` — print version information and exit. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 5fbbf8f723c..3654cd157e9 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -487,24 +487,23 @@ Where: ## uniqUpTo(N)(x) -Calculates the number of different argument values ​​if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1. +Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value. -Recommended for use with small Ns, up to 10. The maximum value of N is 100. +Recommended for use with small `N`s, up to 10. The maximum value of `N` is 100. -For the state of an aggregate function, it uses the amount of memory equal to 1 + N \* the size of one value of bytes. -For strings, it stores a non-cryptographic hash of 8 bytes. That is, the calculation is approximated for strings. +For the state of an aggregate function, this function uses the amount of memory equal to 1 + `N` \* the size of one value of bytes. +When dealing with strings, this function stores a non-cryptographic hash of 8 bytes; the calculation is approximated for strings. -The function also works for several arguments. +For example, if you had a table that logs every search query made by users on your website. Each row in the table represents a single search query, with columns for the user ID, the search query, and the timestamp of the query. You can use `uniqUpTo` to generate a report that shows only the keywords that produced at least 5 unique users. -It works as fast as possible, except for cases when a large N value is used and the number of unique values is slightly less than N. - -Usage example: - -``` text -Problem: Generate a report that shows only keywords that produced at least 5 unique users. -Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 +```sql +SELECT SearchPhrase +FROM SearchLog +GROUP BY SearchPhrase +HAVING uniqUpTo(4)(UserID) >= 5 ``` +`uniqUpTo(4)(UserID)` calculates the number of unique `UserID` values for each `SearchPhrase`, but it only counts up to 4 unique values. If there are more than 4 unique `UserID` values for a `SearchPhrase`, the function returns 5 (4 + 1). The `HAVING` clause then filters out the `SearchPhrase` values for which the number of unique `UserID` values is less than 5. This will give you a list of search keywords that were used by at least 5 unique users. ## sumMapFiltered(keys_to_keep)(keys, values) diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index f79fe66c05d..a45eb1b409f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,7 +5,12 @@ sidebar_position: 6 # any -Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column. +Selects the first encountered value of a column. + +By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. + +The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. + The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. @@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. -- Alias: `any_value` +- Alias: `any_value`, `first_value`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md index c1965b23fe3..0c26b66c64a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -5,9 +5,12 @@ sidebar_position: 7 # first_value -Selects the first encountered value, similar to `any`, but could accept NULL. -Mostly it should be used with [Window Functions](../../window-functions/index.md). -Without Window Functions the result will be random if the source stream is not ordered. +It is an alias for [`any`](../../../sql-reference/aggregate-functions/reference/any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse aggregate functions ignore NULL values). + +It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations. + +As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type +matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added). ## examples @@ -23,15 +26,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null); ``` ### example1 -The NULL value is ignored at default. +By default, the NULL value is ignored. ```sql select first_value(b) from test_data; ``` ```text -┌─first_value_ignore_nulls(b)─┐ -│ 3 │ -└─────────────────────────────┘ +┌─any(b)─┐ +│ 3 │ +└────────┘ ``` ### example2 @@ -41,9 +44,9 @@ select first_value(b) ignore nulls from test_data ``` ```text -┌─first_value_ignore_nulls(b)─┐ -│ 3 │ -└─────────────────────────────┘ +┌─any(b) IGNORE NULLS ─┐ +│ 3 │ +└──────────────────────┘ ``` ### example3 @@ -53,9 +56,9 @@ select first_value(b) respect nulls from test_data ``` ```text -┌─first_value_respect_nulls(b)─┐ -│ ᴺᵁᴸᴸ │ -└──────────────────────────────┘ +┌─any(b) RESPECT NULLS ─┐ +│ ᴺᵁᴸᴸ │ +└───────────────────────┘ ``` ### example4 @@ -73,8 +76,8 @@ FROM ``` ```text -┌─first_value_respect_nulls(b)─┬─first_value(b)─┐ -│ ᴺᵁᴸᴸ │ 3 │ -└──────────────────────────────┴────────────────┘ +┌─any_respect_nulls(b)─┬─any(b)─┐ +│ ᴺᵁᴸᴸ │ 3 │ +└──────────────────────┴────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md deleted file mode 100644 index cc601c097fe..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ /dev/null @@ -1,48 +0,0 @@ - --- - toc_priority: 112 - --- - - # groupArraySorted {#groupArraySorted} - - Returns an array with the first N items in ascending order. - - ``` sql - groupArraySorted(N)(column) - ``` - - **Arguments** - - - `N` – The number of elements to return. - - If the parameter is omitted, default value is the size of input. - - - `column` – The value (Integer, String, Float and other Generic types). - - **Example** - - Gets the first 10 numbers: - - ``` sql - SELECT groupArraySorted(10)(number) FROM numbers(100) - ``` - - ``` text - ┌─groupArraySorted(10)(number)─┐ - │ [0,1,2,3,4,5,6,7,8,9] │ - └──────────────────────────────┘ - ``` - - - Gets all the String implementations of all numbers in column: - - ``` sql -SELECT groupArraySorted(str) FROM (SELECT toString(number) as str FROM numbers(5)); - - ``` - - ``` text - ┌─groupArraySorted(str)────────┐ - │ ['0','1','2','3','4'] │ - └──────────────────────────────┘ - ``` - \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 3bf0e070cae..b1f2c5bacbb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -54,7 +54,6 @@ ClickHouse-specific aggregate functions: - [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) - [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) - [groupArraySample](./grouparraysample.md) -- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md) - [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) - [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) - [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 75fcbab6401..00efa63c960 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1083,7 +1083,7 @@ Result: **See also** -- [arrayFold](#arrayFold) +- [arrayFold](#arrayfold) ## arrayReduceInRanges @@ -1175,7 +1175,7 @@ FROM numbers(1,10); **See also** -- [arrayReduce](#arrayReduce) +- [arrayReduce](#arrayreduce) ## arrayReverse(arr) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 7276437ec82..a23849c13aa 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -319,9 +319,9 @@ This is a relatively fast non-cryptographic hash function of average quality for Calculates a 64-bit hash code from any type of integer. It works faster than intHash32. Average quality. -## SHA1, SHA224, SHA256, SHA512 +## SHA1, SHA224, SHA256, SHA512, SHA512_256 -Calculates SHA-1, SHA-224, SHA-256, SHA-512 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). +Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). **Syntax** @@ -1776,3 +1776,33 @@ Result: │ (('queries','database','analytical'),('oriented','processing','DBMS')) │ └────────────────────────────────────────────────────────────────────────┘ ``` + +## sqid + +Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/). + +**Syntax** + +```sql +sqid(number1, ...) +``` + +**Arguments** + +- A variable number of UInt8, UInt16, UInt32 or UInt64 numbers. + +**Returned Value** + +A hash id [String](/docs/en/sql-reference/data-types/string.md). + +**Example** + +```sql +SELECT sqid(1, 2, 3, 4, 5); +``` + +```response +┌─sqid(1, 2, 3, 4, 5)─┐ +│ gXHfJ1C6dN │ +└─────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b2a1d5066bb..5b9d01985dd 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -67,45 +67,7 @@ WHERE macro = 'test'; │ test │ Value │ └───────┴──────────────┘ ``` - -## getClientHTTPHeader -Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception. -**Syntax** - -```sql -getClientHTTPHeader(name); -``` - -**Arguments** - -- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string) - -**Returned value** - -Value of the specified header. -Type:[String](../../sql-reference/data-types/string.md#string). - - -When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol. -```sql -SELECT getCientHTTPHeader('test') -``` -result: - -```text -┌─getClientHTTPHeader('test')─┐ -│ │ -└────────────------───────────┘ -``` -Try to use http request: -```shell -echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @- - -#result -default -``` - ## FQDN Returns the fully qualified domain name of the ClickHouse server. @@ -666,6 +628,8 @@ SELECT formatReadableSize(filesize_bytes) AS filesize ``` +Alias: `FORMAT_BYTES`. + ``` text ┌─filesize_bytes─┬─filesize───┐ │ 1 │ 1.00 B │ diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 1940993ce0b..0f85b81278d 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -393,40 +393,6 @@ Reverses the sequence of bytes in a string. Reverses a sequence of Unicode code points in a string. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## format - -Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers). - -**Syntax** - -```sql -format(pattern, s0, s1, …) -``` - -**Example** - -``` sql -SELECT format('{1} {0} {1}', 'World', 'Hello') -``` - -```result -┌─format('{1} {0} {1}', 'World', 'Hello')─┐ -│ Hello World Hello │ -└─────────────────────────────────────────┘ -``` - -With implicit numbers: - -``` sql -SELECT format('{} {}', 'Hello', 'World') -``` - -```result -┌─format('{} {}', 'Hello', 'World')─┐ -│ Hello World │ -└───────────────────────────────────┘ -``` - ## concat Concatenates the given arguments. diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 74d5d747193..c7bd16cad4a 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -132,6 +132,40 @@ For more information, see [RE2](https://github.com/google/re2/blob/master/re2/re regexpQuoteMeta(s) ``` +## format + +Format the `pattern` string with the values (strings, integers, etc.) listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers). + +**Syntax** + +```sql +format(pattern, s0, s1, …) +``` + +**Example** + +``` sql +SELECT format('{1} {0} {1}', 'World', 'Hello') +``` + +```result +┌─format('{1} {0} {1}', 'World', 'Hello')─┐ +│ Hello World Hello │ +└─────────────────────────────────────────┘ +``` + +With implicit numbers: + +``` sql +SELECT format('{} {}', 'Hello', 'World') +``` + +```result +┌─format('{} {}', 'Hello', 'World')─┐ +│ Hello World │ +└───────────────────────────────────┘ +``` + ## translate Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md deleted file mode 100644 index e183fdcdcd7..00000000000 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -slug: /en/sql-reference/functions/time-series-functions -sidebar_position: 172 -sidebar_label: Time Series ---- - -# Time Series Functions - -Below functions are used for time series analysis. - -## seriesPeriodDetectFFT - -Finds the period of the given time series data using FFT -Detect Period in time series data using FFT. -FFT - Fast Fourier transform (https://en.wikipedia.org/wiki/Fast_Fourier_transform) - -**Syntax** - -``` sql -seriesPeriodDetectFFT(series); -``` - -**Arguments** - -- `series` - An array of numeric values - -**Returned value** - -- A real value equal to the period of time series - -Type: [Float64](../../sql-reference/data-types/float.md). - -**Examples** - -Query: - -``` sql -SELECT seriesPeriodDetectFFT([1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6, 1, 4, 6]) AS print_0; -``` - -Result: - -``` text -┌───────────print_0──────┐ -│ 3 │ -└────────────────────────┘ -``` diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 6ceb9b5849e..2cb802c863b 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -10,7 +10,7 @@ A set of queries that allow changing the table structure. Syntax: ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... +ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... ``` In the query, specify a list of one or more comma-separated actions. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 2a8d6788889..56828745048 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -90,152 +90,11 @@ Views look the same as normal tables. For example, they are listed in the result To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well. -## Live View [Experimental] +## Live View [Deprecated] -:::note -This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. -::: +This feature is deprecated and will be removed in the future. -```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... -``` - -Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. - -Live views are triggered by insert into the innermost table specified in the query. - -Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery. - -:::info -- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. -- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. -- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. -- Does not work with replicated or distributed tables where inserts are performed on different nodes. -- Can't be triggered by multiple tables. - -See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. -::: - -### Monitoring Live View Changes - -You can monitor changes in the `LIVE VIEW` query result using [WATCH](../../../sql-reference/statements/watch.md) query. - -```sql -WATCH [db.]live_view -``` - -**Example:** - -```sql -CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; -CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt; -``` -Watch a live view while doing a parallel insert into the source table. - -```sql -WATCH lv; -``` - -```bash -┌─sum(x)─┬─_version─┐ -│ 1 │ 1 │ -└────────┴──────────┘ -┌─sum(x)─┬─_version─┐ -│ 3 │ 2 │ -└────────┴──────────┘ -┌─sum(x)─┬─_version─┐ -│ 6 │ 3 │ -└────────┴──────────┘ -``` - -```sql -INSERT INTO mt VALUES (1); -INSERT INTO mt VALUES (2); -INSERT INTO mt VALUES (3); -``` - -Or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events. - -```sql -WATCH [db.]live_view EVENTS; -``` - -**Example:** - -```sql -WATCH lv EVENTS; -``` - -```bash -┌─version─┐ -│ 1 │ -└─────────┘ -┌─version─┐ -│ 2 │ -└─────────┘ -┌─version─┐ -│ 3 │ -└─────────┘ -``` - -You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables. - -```sql -SELECT * FROM [db.]live_view WHERE ... -``` - -### Force Live View Refresh - -You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement. - -### WITH REFRESH Clause - -When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger. - -```sql -CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ... -``` - -If the refresh value is not specified then the value specified by the [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh) setting is used. - -**Example:** - -```sql -CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); -WATCH lv -``` - -```bash -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:05 │ 1 │ -└─────────────────────┴──────────┘ -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:10 │ 2 │ -└─────────────────────┴──────────┘ -┌───────────────now()─┬─_version─┐ -│ 2021-02-21 08:47:15 │ 3 │ -└─────────────────────┴──────────┘ -``` - -```sql -WATCH lv -``` - -``` -Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv does not exist.. -``` - -### Live View Usage - -Most common uses of live view tables include: - -- Providing push notifications for query result changes to avoid polling. -- Caching results of most frequent queries to provide immediate query results. -- Watching for table changes and triggering a follow-up select queries. -- Watching metrics from system tables using periodic refresh. - -**See Also** -- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) +For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md) ## Window View [Experimental] diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 9afc7099108..e7c2000301a 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -415,7 +415,7 @@ ExpressionTransform ExpressionTransform × 2 (SettingQuotaAndLimits) (ReadFromStorage) - NumbersMt × 2 0 → 1 + NumbersRange × 2 0 → 1 ``` ### EXPLAIN ESTIMATE diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 177a8283f38..695801983b7 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -150,7 +150,7 @@ SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name] ## RELOAD USERS -Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. +Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. ```sql SYSTEM RELOAD USERS [ON CLUSTER cluster_name] @@ -354,7 +354,7 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name` ### SYNC DATABASE REPLICA -Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. +Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. **Syntax** ```sql @@ -451,12 +451,12 @@ SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ### SYSTEM STOP LISTEN -Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. ```sql -SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] ``` - If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. @@ -471,5 +471,5 @@ Allows new connections to be established on the specified protocols. However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. ```sql -SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP WITH PROXY | TCP SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] ``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index ad1feb87c60..98498eb8823 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -1,4 +1,4 @@ - -- +--- slug: /en/sql-reference/table-functions/file sidebar_position: 60 sidebar_label: file diff --git a/docs/en/sql-reference/table-functions/fileCluster.md b/docs/en/sql-reference/table-functions/fileCluster.md index 22ca132f136..2646250311c 100644 --- a/docs/en/sql-reference/table-functions/fileCluster.md +++ b/docs/en/sql-reference/table-functions/fileCluster.md @@ -45,7 +45,7 @@ $ cat /var/lib/clickhouse/user_files/test1.csv 1,"file1" 11,"file11" -$ cat /var/lib/clickhouse/user_files/test1.csv +$ cat /var/lib/clickhouse/user_files/test2.csv 2,"file2" 22,"file22" ``` diff --git a/docs/en/sql-reference/table-functions/numbers.md b/docs/en/sql-reference/table-functions/numbers.md index 32f51363a0a..7d3437b7d53 100644 --- a/docs/en/sql-reference/table-functions/numbers.md +++ b/docs/en/sql-reference/table-functions/numbers.md @@ -17,6 +17,8 @@ The following queries are equivalent: SELECT * FROM numbers(10); SELECT * FROM numbers(0, 10); SELECT * FROM system.numbers LIMIT 10; +SELECT * FROM system.numbers WHERE number BETWEEN 0 AND 9; +SELECT * FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9); ``` Examples: diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 7195ee38af6..ebb692bb270 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -681,7 +681,6 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); - `disk` — диск, находящийся внутри тома. - `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том. - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты. -- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Примеры конфигураций: @@ -718,7 +717,6 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); external - true diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 47ab6474fc0..8910c258788 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -14,7 +14,7 @@ ClickHouse предоставляет собственный клиент ком $ clickhouse-client ClickHouse client version 20.13.1.5273 (official build). Connecting to localhost:9000 as user default. -Connected to ClickHouse server version 20.13.1 revision 54442. +Connected to ClickHouse server version 20.13.1. :) ``` diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index 60d17363d9a..855462588e8 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -119,7 +119,7 @@ Eсли суммарное число активных кусков во все - Положительное целое число. - 0 (без ограничений). -Значение по умолчанию: 100. +Значение по умолчанию: 1000. Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper. Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e13ddd18030..2081dcc59b6 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2249,7 +2249,7 @@ SELECT * FROM test_table ## input_format_parallel_parsing {#input-format-parallel-parsing} -Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). +Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TSKV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Возможные значения: @@ -2260,7 +2260,7 @@ SELECT * FROM test_table ## output_format_parallel_formatting {#output-format-parallel-formatting} -Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). +Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TSKV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Возможные значения: diff --git a/docs/ru/operations/system-tables/storage_policies.md b/docs/ru/operations/system-tables/storage_policies.md index 8c3f117ca91..dbb91a8ec1a 100644 --- a/docs/ru/operations/system-tables/storage_policies.md +++ b/docs/ru/operations/system-tables/storage_policies.md @@ -13,6 +13,5 @@ slug: /ru/operations/system-tables/storage_policies - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — имена дисков, содержащихся в политике хранения. - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений). - `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Значение настройки `prefer_not_to_merge`. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками. Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице. diff --git a/docs/ru/operations/system-tables/trace_log.md b/docs/ru/operations/system-tables/trace_log.md index 2c0b42bac8b..85b468a0f69 100644 --- a/docs/ru/operations/system-tables/trace_log.md +++ b/docs/ru/operations/system-tables/trace_log.md @@ -19,7 +19,7 @@ ClickHouse создает эту таблицу когда установлен - `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse. - Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`. + Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1.`. Это поле содержит номер после `revision`, но не содержит строку после `version`. - `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки: diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 6f0394a183d..92712a6f6b2 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -45,7 +45,6 @@ $ clickhouse-local --structure "table_structure" --input-format "format_of_incom - `--logger.level` — уровень логирования. - `--ignore-error` — не прекращать обработку если запрос выдал ошибку. - `-c`, `--config-file` — путь к файлу конфигурации. По умолчанию `clickhouse-local` запускается с пустой конфигурацией. Конфигурационный файл имеет тот же формат, что и для сервера ClickHouse, и в нём можно использовать все конфигурационные параметры сервера. Обычно подключение конфигурации не требуется; если требуется установить отдельный параметр, то это можно сделать ключом с именем параметра. -- `--no-system-tables` — запуск без использования системных таблиц. - `--help` — вывод справочной информации о `clickhouse-local`. - `-V`, `--version` — вывод текущей версии и выход. diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 1a45f0f1f7f..385a9835eca 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -11,7 +11,7 @@ sidebar_label: "Манипуляции со столбцами" Синтаксис: ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... +ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... ``` В запросе можно указать сразу несколько действий над одной таблицей через запятую. diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index 4e0a13f7eae..4d1d4ad4de5 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -371,7 +371,7 @@ ExpressionTransform ExpressionTransform × 2 (SettingQuotaAndLimits) (ReadFromStorage) - NumbersMt × 2 0 → 1 + NumbersRange × 2 0 → 1 ``` ### EXPLAIN ESTIMATE {#explain-estimate} diff --git a/docs/ru/sql-reference/table-functions/fileCluster.md b/docs/ru/sql-reference/table-functions/fileCluster.md index 7385f4859dc..2f2145b6dba 100644 --- a/docs/ru/sql-reference/table-functions/fileCluster.md +++ b/docs/ru/sql-reference/table-functions/fileCluster.md @@ -44,7 +44,7 @@ $ cat /var/lib/clickhouse/user_files/test1.csv 1,"file1" 11,"file11" -$ cat /var/lib/clickhouse/user_files/test1.csv +$ cat /var/lib/clickhouse/user_files/test2.csv 2,"file2" 22,"file22" ``` diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index c738ae0f24c..ef46afbcbd5 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -623,7 +623,6 @@ MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某 - `disk` — 卷中的磁盘。 - `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 - `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 -- `prefer_not_to_merge` - 禁止在这个卷中进行数据合并。该选项启用时,对该卷的数据不能进行合并。这个选项主要用于慢速磁盘。 配置示例: @@ -660,7 +659,6 @@ MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某 external - true diff --git a/docs/zh/interfaces/cli.md b/docs/zh/interfaces/cli.md index a6b4d10dd2f..7f2a887578c 100644 --- a/docs/zh/interfaces/cli.md +++ b/docs/zh/interfaces/cli.md @@ -14,7 +14,7 @@ ClickHouse提供了一个原生命令行客户端`clickhouse-client`客户端支 $ clickhouse-client ClickHouse client version 19.17.1.1579 (official build). Connecting to localhost:9000 as user default. -Connected to ClickHouse server version 19.17.1 revision 54428. +Connected to ClickHouse server version 19.17.1. :) ``` diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 86e205ea401..1874970ac95 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1203,7 +1203,7 @@ ClickHouse生成异常 - 类型:布尔 - 默认值:True -启用数据格式的保序并行分析。 仅支持TSV,TKSV,CSV和JSONEachRow格式。 +启用数据格式的保序并行分析。 仅支持TSV,TSKV,CSV和JSONEachRow格式。 ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} diff --git a/docs/zh/operations/system-tables/storage_policies.md b/docs/zh/operations/system-tables/storage_policies.md index e29915a98da..27219f789f4 100644 --- a/docs/zh/operations/system-tables/storage_policies.md +++ b/docs/zh/operations/system-tables/storage_policies.md @@ -13,6 +13,5 @@ slug: /zh/operations/system-tables/storage_policies - `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — 存储策略中定义的磁盘名。 - `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 可以存储在卷磁盘上数据部分的最大大小 (0 - 不限制)。 - `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — 磁盘空闲的比率。当比率超过配置的值,ClickHouse 将把数据向下一个卷移动。 -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 设置中 `prefer_not_to_merge` 的值. 当这个设置启用时,不允许在此卷上合并数据。这将允许控制 ClickHouse 如何与运行速度较慢的磁盘一起工作。 如果存储策略包含多个卷,则每个卷的信息将在表中作为单独一行存储。 diff --git a/docs/zh/operations/system-tables/trace_log.md b/docs/zh/operations/system-tables/trace_log.md index 479a8b78762..ccdfdd72ce0 100644 --- a/docs/zh/operations/system-tables/trace_log.md +++ b/docs/zh/operations/system-tables/trace_log.md @@ -22,7 +22,7 @@ ClickHouse创建此表时 [trace_log](../../operations/server-configuration-para - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。 + 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1.`. 该字段包含 `revision`,但不是 `version` 的服务器。 - `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type: diff --git a/docs/zh/operations/utilities/clickhouse-local.md b/docs/zh/operations/utilities/clickhouse-local.md index 7428ae06a6e..e8c9503626b 100644 --- a/docs/zh/operations/utilities/clickhouse-local.md +++ b/docs/zh/operations/utilities/clickhouse-local.md @@ -45,7 +45,6 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin - `--logger.level` — 日志级别。 - `--ignore-error` — 当查询失败时,不停止处理。 - `-c`, `--config-file` — 与ClickHouse服务器格式相同配置文件的路径,默认情况下配置为空。 -- `--no-system-tables` — 不附加系统表。 - `--help` — `clickhouse-local`使用帮助信息。 - `-V`, `--version` — 打印版本信息并退出。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 3233e40de31..4e81c0a75f6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -493,8 +493,7 @@ void Client::connect() if (is_interactive) { - std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "." - << std::endl << std::endl; + std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl; auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH); auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch); diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 421e4038d12..f176fa277d7 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -36,7 +36,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -51,8 +51,8 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk_from = global_context->getDisk(disk_name_from); - DiskPtr disk_to = global_context->getDisk(disk_name_to); + DiskPtr disk_from = disk_selector->get(disk_name_from); + DiskPtr disk_to = disk_selector->get(disk_name_to); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 357832865fb..dbaa3162f82 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -27,7 +27,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -41,7 +41,7 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 48b54b70014..ea84cd0682d 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -33,7 +33,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -46,7 +46,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 7b2fcd16107..79da021fd00 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -26,8 +26,8 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, - Poco::Util::LayeredConfiguration &) override + std::shared_ptr &, + Poco::Util::LayeredConfiguration & config) override { if (!command_arguments.empty()) { @@ -35,8 +35,29 @@ public: throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } - for (const auto & [disk_name, _] : global_context->getDisksMap()) - std::cout << disk_name << '\n'; + constexpr auto config_prefix = "storage_configuration.disks"; + constexpr auto default_disk_name = "default"; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_prefix, keys); + + bool has_default_disk = false; + + /// For the output to be ordered + std::set disks; + + for (const auto & disk_name : keys) + { + if (disk_name == default_disk_name) + has_default_disk = true; + disks.insert(disk_name); + } + + if (!has_default_disk) + disks.insert(default_disk_name); + + for (const auto & disk : disks) + std::cout << disk << '\n'; } }; } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index e5df982d896..6d33bdec498 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -34,7 +34,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -47,7 +47,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); bool recursive = config.getBool("recursive", false); diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 654090b2138..75cf96252ed 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -26,7 +26,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 2) @@ -40,7 +40,7 @@ public: const String & path_from = command_arguments[0]; const String & path_to = command_arguments[1]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index b6cacdd2c61..85041faf22c 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -36,7 +36,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -47,7 +47,7 @@ public: String disk_name = config.getString("disk", "default"); - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(command_arguments[0]); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index ff8d4a1c6bb..0c631eacff3 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -26,7 +26,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -39,7 +39,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index d075daf3215..7ded37e067a 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -37,7 +37,7 @@ public: void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) override { if (command_arguments.size() != 1) @@ -50,7 +50,7 @@ public: const String & path = command_arguments[0]; - DiskPtr disk = global_context->getDisk(disk_name); + DiskPtr disk = disk_selector->get(disk_name); String relative_path = validatePathAndGetAsRelative(path); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b81cd52f8c8..ded324fd0da 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -209,7 +209,35 @@ int DisksApp::main(const std::vector & /*args*/) po::parsed_options parsed = parser.run(); args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); } - command->execute(args, global_context, config()); + + std::unordered_set disks + { + config().getString("disk", "default"), + config().getString("disk-from", config().getString("disk", "default")), + config().getString("disk-to", config().getString("disk", "default")), + }; + + auto validator = [&disks]( + const Poco::Util::AbstractConfiguration & config, + const std::string & disk_config_prefix, + const std::string & disk_name) + { + if (!disks.contains(disk_name)) + return false; + + const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); + + if (disk_type == "cache") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); + + return true; + }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + command->execute(args, disk_selector, config()); return Application::EXIT_OK; } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index de41eedec35..da106e1084e 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -25,7 +26,7 @@ public: virtual void execute( const std::vector & command_arguments, - DB::ContextMutablePtr & global_context, + std::shared_ptr & disk_selector, Poco::Util::LayeredConfiguration & config) = 0; const std::optional & getCommandOptions() const { return command_option_description; } diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index e04e669abae..9c45fc5f4e1 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -289,6 +290,33 @@ try if (!config().has("keeper_server")) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); + auto updateMemorySoftLimitInConfig = [&](Poco::Util::AbstractConfiguration & config) + { + UInt64 memory_soft_limit = 0; + if (config.has("keeper_server.max_memory_usage_soft_limit")) + { + memory_soft_limit = config.getUInt64("keeper_server.max_memory_usage_soft_limit"); + } + + /// if memory soft limit is not set, we will use default value + if (memory_soft_limit == 0) + { + Float64 ratio = 0.9; + if (config.has("keeper_server.max_memory_usage_soft_limit_ratio")) + ratio = config.getDouble("keeper_server.max_memory_usage_soft_limit_ratio"); + + size_t physical_server_memory = getMemoryAmount(); + if (ratio > 0 && physical_server_memory > 0) + { + memory_soft_limit = static_cast(physical_server_memory * ratio); + config.setUInt64("keeper_server.max_memory_usage_soft_limit", memory_soft_limit); + } + } + LOG_INFO(log, "keeper_server.max_memory_usage_soft_limit is set to {}", formatReadableSizeWithBinarySuffix(memory_soft_limit)); + }; + + updateMemorySoftLimitInConfig(config()); + std::string path; if (config().has("keeper_server.storage_path")) @@ -328,6 +356,13 @@ try config().getUInt("max_thread_pool_free_size", 1000), config().getUInt("thread_pool_queue_size", 10000) ); + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). + SCOPE_EXIT({ + Stopwatch watch; + LOG_INFO(log, "Waiting for background threads"); + GlobalThreadPool::instance().shutdown(); + LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); + }); static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -492,6 +527,8 @@ try { updateLevels(*config, logger()); + updateMemorySoftLimitInConfig(*config); + if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f3b84fa3eb1..8e526812957 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -742,16 +743,16 @@ void LocalServer::processConfig() status.emplace(fs::path(path) / "status", StatusFile::write_full_info); LOG_DEBUG(log, "Loading metadata from {}", path); - loadMetadataSystem(global_context); - attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + auto startup_system_tasks = loadMetadataSystem(global_context); + attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); - startupSystemTables(); + waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks); if (!config().has("only-system-tables")) { DatabaseCatalog::instance().createBackgroundTasks(); - loadMetadata(global_context); + waitLoad(loadMetadata(global_context)); DatabaseCatalog::instance().startupBackgroundTasks(); } @@ -760,9 +761,9 @@ void LocalServer::processConfig() LOG_DEBUG(log, "Loaded metadata."); } - else if (!config().has("no-system-tables")) + else { - attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } @@ -841,7 +842,6 @@ void LocalServer::addOptions(OptionsDescription & options_description) ("logger.log", po::value(), "Log file name") ("logger.level", po::value(), "Log level") - ("no-system-tables", "do not attach system tables (better startup time)") ("path", po::value(), "Storage path") ("only-system-tables", "attach only system tables from specified path") ("top_level_domains_path", po::value(), "Path to lists with custom TLDs") @@ -870,8 +870,6 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp config().setString("table-file", options["file"].as()); if (options.count("structure")) config().setString("table-structure", options["structure"].as()); - if (options.count("no-system-tables")) - config().setBool("no-system-tables", true); if (options.count("only-system-tables")) config().setBool("only-system-tables", true); if (options.count("database")) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8519532f788..8076d108083 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -656,6 +657,11 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); + Poco::ThreadPool server_pool(3, server_settings.max_connections); + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -696,6 +702,68 @@ try server_settings.max_thread_pool_size, server_settings.max_thread_pool_free_size, server_settings.thread_pool_queue_size); + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). + SCOPE_EXIT({ + Stopwatch watch; + LOG_INFO(log, "Waiting for background threads"); + GlobalThreadPool::instance().shutdown(); + LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); + }); + + /// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown() + /// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads. + SCOPE_EXIT({ + /** Ask to cancel background jobs all table engines, + * and also query_log. + * It is important to do early, not in destructor of Context, because + * table engines could use Context on destroy. + */ + LOG_INFO(log, "Shutting down storages."); + + global_context->shutdown(); + + LOG_DEBUG(log, "Shut down storages."); + + if (!servers_to_start_before_tables.empty()) + { + LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); + size_t current_connections = 0; + { + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } + } + + if (current_connections) + LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(log, "Closed all listening sockets."); + + if (current_connections > 0) + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); + + if (current_connections) + LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); + else + LOG_INFO(log, "Closed connections to servers for tables."); + } + + global_context->shutdownKeeperDispatcher(); + + /// Wait server pool to avoid use-after-free of destroyed context in the handlers + server_pool.joinAll(); + + /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. + * At this moment, no one could own shared part of Context. + */ + global_context.reset(); + shared_context.reset(); + LOG_DEBUG(log, "Destroyed global context."); + }); + #if USE_AZURE_BLOB_STORAGE /// It makes sense to deinitialize libxml after joining of all threads @@ -754,10 +822,6 @@ try } } - Poco::ThreadPool server_pool(3, server_settings.max_connections); - std::mutex servers_lock; - std::vector servers; - std::vector servers_to_start_before_tables; /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, @@ -1279,8 +1343,6 @@ try global_context->setHTTPHeaderFilter(*config); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); - global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers); - global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; @@ -1336,6 +1398,10 @@ try global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size); global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size); + global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, server_settings_.tables_loader_foreground_pool_size); + global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, server_settings_.tables_loader_background_pool_size); + global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, server_settings_.tables_loader_background_pool_size); + getIOThreadPool().reloadConfiguration( server_settings.max_io_thread_pool_size, server_settings.max_io_thread_pool_free_size, @@ -1595,60 +1661,6 @@ try /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); - SCOPE_EXIT({ - async_metrics.stop(); - - /** Ask to cancel background jobs all table engines, - * and also query_log. - * It is important to do early, not in destructor of Context, because - * table engines could use Context on destroy. - */ - LOG_INFO(log, "Shutting down storages."); - - global_context->shutdown(); - - LOG_DEBUG(log, "Shut down storages."); - - if (!servers_to_start_before_tables.empty()) - { - LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); - size_t current_connections = 0; - { - std::lock_guard lock(servers_lock); - for (auto & server : servers_to_start_before_tables) - { - server.stop(); - current_connections += server.currentConnections(); - } - } - - if (current_connections) - LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); - else - LOG_INFO(log, "Closed all listening sockets."); - - if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished); - - if (current_connections) - LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); - else - LOG_INFO(log, "Closed connections to servers for tables."); - - global_context->shutdownKeeperDispatcher(); - } - - /// Wait server pool to avoid use-after-free of destroyed context in the handlers - server_pool.joinAll(); - - /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. - * At this moment, no one could own shared part of Context. - */ - global_context.reset(); - shared_context.reset(); - LOG_DEBUG(log, "Destroyed global context."); - }); - /// DNSCacheUpdater uses BackgroundSchedulePool which lives in shared context /// and thus this object must be created after the SCOPE_EXIT object where shared /// context is destroyed. @@ -1676,17 +1688,18 @@ try LOG_INFO(log, "Loading metadata from {}", path_str); + LoadTaskPtrs load_metadata_tasks; try { auto & database_catalog = DatabaseCatalog::instance(); /// We load temporary database first, because projections need it. database_catalog.initializeAndLoadTemporaryDatabase(); - loadMetadataSystem(global_context); - maybeConvertSystemDatabase(global_context); + auto system_startup_tasks = loadMetadataSystem(global_context); + maybeConvertSystemDatabase(global_context, system_startup_tasks); /// This has to be done before the initialization of system logs, /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. - startupSystemTables(); + waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -1702,9 +1715,10 @@ try /// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap. database_catalog.loadMarkedAsDroppedTables(); database_catalog.createBackgroundTasks(); - /// Then, load remaining databases - loadMetadata(global_context, default_database); - convertDatabasesEnginesIfNeed(global_context); + /// Then, load remaining databases (some of them maybe be loaded asynchronously) + load_metadata_tasks = loadMetadata(global_context, default_database, server_settings.async_load_databases); + /// If we need to convert database engines, disable async tables loading + convertDatabasesEnginesIfNeed(load_metadata_tasks, global_context); database_catalog.startupBackgroundTasks(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); @@ -1716,6 +1730,7 @@ try tryLogCurrentException(log, "Caught exception while loading metadata"); throw; } + LOG_DEBUG(log, "Loaded metadata."); /// Init trace collector only after trace_log system table was created @@ -1871,9 +1886,14 @@ try throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0"); global_context->setDDLWorker(std::make_unique(pool_size, ddl_zookeeper_path, global_context, &config(), "distributed_ddl", "DDLWorker", - &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID)); + &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID), + load_metadata_tasks); } + /// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only. + load_metadata_tasks.clear(); + load_metadata_tasks.shrink_to_fit(); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/programs/server/config.xml b/programs/server/config.xml index f367b97cec1..52a1c528040 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -364,8 +364,15 @@ 128 16 16 + 0 + 0 --> + + + 0.9 @@ -478,7 +485,6 @@ true - false round_robin diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 123a15c5706..c62e0c98184 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -108,7 +108,7 @@ filter: blur(1px); } - .chart div { position: absolute; } + .chart > div { position: absolute; } .inputs { height: auto; @@ -215,8 +215,6 @@ color: var(--text-color); } - .u-legend th { display: none; } - .themes { float: right; font-size: 20pt; @@ -433,6 +431,16 @@ display: none; } + .u-series { + line-height: 0.8; + } + + .u-series.footer { + font-size: 8px; + padding-top: 0; + margin-top: 0; + } + /* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css * It is copy-pasted to lower the number of requests. */ @@ -478,7 +486,6 @@ * - compress the state for URL's #hash; * - footer with "about" or a link to source code; * - allow to configure a table on a server to save the dashboards; - * - multiple lines on chart; * - if a query returned one value, display this value instead of a diagram; * - if a query returned something unusual, display the table; */ @@ -520,10 +527,54 @@ let queries = []; /// Query parameters with predefined default values. /// All other parameters will be automatically found in the queries. let params = { - "rounding": "60", - "seconds": "86400" + 'rounding': '60', + 'seconds': '86400' }; +/// Palette generation for charts +function generatePalette(baseColor, numColors) { + const baseHSL = hexToHsl(baseColor); + const hueStep = 360 / numColors; + const palette = []; + for (let i = 0; i < numColors; i++) { + const hue = Math.round((baseHSL.h + i * hueStep) % 360); + const color = `hsl(${hue}, ${baseHSL.s}%, ${baseHSL.l}%)`; + palette.push(color); + } + return palette; +} + +/// Helper function to convert hex color to HSL +function hexToHsl(hex) { + hex = hex.replace(/^#/, ''); + const bigint = parseInt(hex, 16); + const r = (bigint >> 16) & 255; + const g = (bigint >> 8) & 255; + const b = bigint & 255; + const r_norm = r / 255; + const g_norm = g / 255; + const b_norm = b / 255; + const max = Math.max(r_norm, g_norm, b_norm); + const min = Math.min(r_norm, g_norm, b_norm); + const l = (max + min) / 2; + let s = 0; + if (max !== min) { + s = l > 0.5 ? (max - min) / (2 - max - min) : (max - min) / (max + min); + } + let h = 0; + if (max !== min) { + if (max === r_norm) { + h = (g_norm - b_norm) / (max - min) + (g_norm < b_norm ? 6 : 0); + } else if (max === g_norm) { + h = (b_norm - r_norm) / (max - min) + 2; + } else { + h = (r_norm - g_norm) / (max - min) + 4; + } + } + h = Math.round(h * 60); + return { h, s: Math.round(s * 100), l: Math.round(l * 100) }; +} + let theme = 'light'; function setTheme(new_theme) { @@ -913,6 +964,8 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => { function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) { let legendEl; + let showTop = false; + const showLimit = 5; function init(u, opts) { legendEl = u.root.querySelector(".u-legend"); @@ -932,13 +985,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- ...style }); - // hide series color markers - const idents = legendEl.querySelectorAll(".u-marker"); + if (opts.series.length == 2) { + const nodes = legendEl.querySelectorAll("th"); + for (let i = 0; i < nodes.length; i++) + nodes[i].style.display = "none"; + } else { + legendEl.querySelector("th").remove(); + legendEl.querySelector("td").setAttribute('colspan', '2'); + legendEl.querySelector("td").style.textAlign = 'center'; + } - for (let i = 0; i < idents.length; i++) - idents[i].style.display = "none"; + if (opts.series.length - 1 > showLimit) { + showTop = true; + let footer = legendEl.insertRow().insertCell(); + footer.setAttribute('colspan', '2'); + footer.style.textAlign = 'center'; + footer.classList.add('u-value'); + footer.parentNode.classList.add('u-series','footer'); + footer.textContent = ". . ."; + } const overEl = u.over; + overEl.style.overflow = "visible"; overEl.appendChild(legendEl); @@ -946,11 +1014,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- overEl.addEventListener("mouseleave", () => {legendEl.style.display = "none";}); } + function nodeListToArray(nodeList) { + return Array.prototype.slice.call(nodeList); + } + function update(u) { let { left, top } = u.cursor; left -= legendEl.clientWidth / 2; top -= legendEl.clientHeight / 2; legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; + if (showTop) { + let nodes = nodeListToArray(legendEl.querySelectorAll("tr")); + let header = nodes.shift(); + let footer = nodes.pop(); + nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; }); + nodes.sort((a, b) => +b._sort_key - +a._sort_key); + nodes.forEach(function (node) { node.parentNode.appendChild(node); }); + for (let i = 0; i < nodes.length; i++) { + nodes[i].style.display = i < showLimit ? null : "none"; + delete nodes[i]._sort_key; + } + footer.parentNode.appendChild(footer); + } } return { @@ -961,12 +1046,13 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend- }; } + async function doFetch(query, url_params = '') { host = document.getElementById('url').value || host; user = document.getElementById('user').value; password = document.getElementById('password').value; - let url = `${host}?default_format=JSONCompactColumns&enable_http_compression=1` + let url = `${host}?default_format=JSONColumnsWithMetadata&enable_http_compression=1` if (add_http_cors_header) { // For debug purposes, you may set add_http_cors_header from a browser console @@ -980,14 +1066,17 @@ async function doFetch(query, url_params = '') { url += `&password=${encodeURIComponent(password)}`; } - let response, data, error; + let response, reply, error; try { response = await fetch(url + url_params, { method: "POST", body: query }); - data = await response.text(); + reply = await response.text(); if (response.ok) { - data = JSON.parse(data); + reply = JSON.parse(reply); + if (reply.exception) { + error = reply.exception; + } } else { - error = data; + error = reply; } } catch (e) { console.log(e); @@ -1006,7 +1095,7 @@ async function doFetch(query, url_params = '') { } } - return {data, error}; + return {reply, error}; } async function draw(idx, chart, url_params, query) { @@ -1015,17 +1104,76 @@ async function draw(idx, chart, url_params, query) { plots[idx] = null; } - let {data, error} = await doFetch(query, url_params); + let {reply, error} = await doFetch(query, url_params); + if (!error) { + if (reply.rows.length == 0) { + error = "Query returned empty result."; + } else if (reply.meta.length < 2) { + error = "Query should return at least two columns: unix timestamp and value."; + } else { + for (let i = 0; i < reply.meta.length; i++) { + let label = reply.meta[i].name; + let column = reply.data[label]; + if (!Array.isArray(column) || column.length != reply.data[reply.meta[0].name].length) { + error = "Wrong data format of the query."; + break; + } + } + } + } + + // Transform string-labeled data to multi-column data + function transformToColumns() { + const x = reply.meta[0].name; // time; must be ordered + const l = reply.meta[1].name; // string label column to distinguish series; must be ordered + const y = reply.meta[2].name; // values; must have single value for (x, l) pair + const labels = [...new Set(reply.data[l])].sort((a, b) => a - b); + if (labels.includes('__time__')) { + error = "The second column is not allowed to contain '__time__' values."; + return; + } + const times = [...new Set(reply.data[x])].sort((a, b) => a - b); + let new_meta = [{ name: '__time__', type: reply.meta[0].type }]; + let new_data = { __time__: [] }; + for (let label of labels) { + new_meta.push({ name: label, type: reply.meta[2].type }); + new_data[label] = []; + } + let new_rows = 0; + function row_done(row_time) { + new_rows++; + new_data.__time__.push(row_time); + for (let label of labels) { + if (new_data[label].length < new_rows) { + new_data[label].push(null); + } + } + } + let prev_time = reply.data[x][0]; + const old_rows = reply.data[x].length; + for (let i = 0; i < old_rows; i++) { + const time = reply.data[x][i]; + const label = reply.data[l][i]; + const value = reply.data[y][i]; + if (prev_time != time) { + row_done(prev_time); + prev_time = time; + } + new_data[label].push(value); + } + row_done(prev_time); + reply.meta = new_meta; + reply.data = new_data; + reply.rows = new_rows; + } + + function isStringColumn(type) { + return type === 'String' || type === 'LowCardinality(String)'; + } if (!error) { - if (!Array.isArray(data)) { - error = "Query should return an array."; - } else if (data.length == 0) { - error = "Query returned empty result."; - } else if (data.length != 2) { - error = "Query should return exactly two columns: unix timestamp and value."; - } else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) { - error = "Wrong data format of the query."; + if (reply.meta.length == 3 && isStringColumn(reply.meta[1].type)) { + transformToColumns(); } } @@ -1043,24 +1191,38 @@ async function draw(idx, chart, url_params, query) { } const [line_color, fill_color, grid_color, axes_color] = theme != 'dark' - ? ["#F88", "#FEE", "#EED", "#2c3235"] - : ["#864", "#045", "#2c3235", "#c7d0d9"]; + ? ["#ff8888", "#ffeeee", "#eeeedd", "#2c3235"] + : ["#886644", "#004455", "#2c3235", "#c7d0d9"]; let sync = uPlot.sync("sync"); - const max_value = Math.max(...data[1]); + let axis = { + stroke: axes_color, + grid: { width: 1 / devicePixelRatio, stroke: grid_color }, + ticks: { width: 1 / devicePixelRatio, stroke: grid_color } + }; + + let axes = [axis, axis]; + let series = [{ label: "x" }]; + let data = [reply.data[reply.meta[0].name]]; + + // Treat every column as series + const series_count = reply.meta.length; + const fill = series_count == 2 ? fill_color : undefined; + const palette = generatePalette(line_color, series_count); + let max_value = Number.NEGATIVE_INFINITY; + for (let i = 1; i < series_count; i++) { + let label = reply.meta[i].name; + series.push({ label, stroke: palette[i - 1], fill }); + data.push(reply.data[label]); + max_value = Math.max(max_value, ...reply.data[label]); + } const opts = { width: chart.clientWidth, height: chart.clientHeight, - axes: [ { stroke: axes_color, - grid: { width: 1 / devicePixelRatio, stroke: grid_color }, - ticks: { width: 1 / devicePixelRatio, stroke: grid_color } }, - { stroke: axes_color, - grid: { width: 1 / devicePixelRatio, stroke: grid_color }, - ticks: { width: 1 / devicePixelRatio, stroke: grid_color } } ], - series: [ { label: "x" }, - { label: "y", stroke: line_color, fill: fill_color } ], + axes, + series, padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ], plugins: [ legendAsTooltipPlugin() ], cursor: { @@ -1216,22 +1378,21 @@ function saveState() { } async function searchQueries() { - let {data, error} = await doFetch(search_query); + let {reply, error} = await doFetch(search_query); if (error) { throw new Error(error); } - if (!Array.isArray(data)) { - throw new Error("Search query should return an array."); - } else if (data.length == 0) { + let data = reply.data; + if (reply.rows == 0) { throw new Error("Search query returned empty result."); - } else if (data.length != 2) { + } else if (reply.meta.length != 2 || reply.meta[0].name != "title" || reply.meta[1].name != "query") { throw new Error("Search query should return exactly two columns: title and query."); - } else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) { + } else if (!Array.isArray(data.title) || !Array.isArray(data.query) || data.title.length != data.query.length) { throw new Error("Wrong data format of the search query."); } - for (let i = 0; i < data[0].length; i++) { - queries.push({title: data[0][i], query: data[1][i]}); + for (let i = 0; i < data.title.length; i++) { + queries.push({title: data.title[i], query: data.query[i]}); } regenerate(); diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 6aa25e95679..5ea806baa3b 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -14,6 +14,10 @@ macro(configure_rustc) set(RUST_CFLAGS "${RUST_CFLAGS} --sysroot ${CMAKE_SYSROOT}") endif() + if (USE_MUSL) + set(RUST_CXXFLAGS "${RUST_CXXFLAGS} -D_LIBCPP_HAS_MUSL_LIBC=1") + endif () + if(CCACHE_EXECUTABLE MATCHES "/sccache$") message(STATUS "Using RUSTC_WRAPPER: ${CCACHE_EXECUTABLE}") set(RUSTCWRAPPER "rustc-wrapper = \"${CCACHE_EXECUTABLE}\"") diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index fc8f50efabe..a6010ff07c3 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -1,26 +1,213 @@ #include #include +#include +#include +#include namespace DB { struct Settings; +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + namespace { +struct AggregateFunctionAnyRespectNullsData +{ + enum Status : UInt8 + { + NotSet = 1, + SetNull = 2, + SetOther = 3 + }; + + Status status = Status::NotSet; + Field value; + + bool isSet() const { return status != Status::NotSet; } + void setNull() { status = Status::SetNull; } + void setOther() { status = Status::SetOther; } +}; + +template +class AggregateFunctionAnyRespectNulls final + : public IAggregateFunctionDataHelper> +{ +public: + using Data = AggregateFunctionAnyRespectNullsData; + + SerializationPtr serialization; + const bool returns_nullable_type = false; + + explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type) + : IAggregateFunctionDataHelper>({type}, {}, type) + , serialization(type->getDefaultSerialization()) + , returns_nullable_type(type->isNullable()) + { + } + + String getName() const override + { + if constexpr (First) + return "any_respect_nulls"; + else + return "anyLast_respect_nulls"; + } + + bool allocatesMemoryInArena() const override { return false; } + + void addNull(AggregateDataPtr __restrict place) const + { + chassert(returns_nullable_type); + auto & d = this->data(place); + if (First && d.isSet()) + return; + d.setNull(); + } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + if (columns[0]->isNullable()) + { + if (columns[0]->isNullAt(row_num)) + return addNull(place); + } + auto & d = this->data(place); + if (First && d.isSet()) + return; + d.setOther(); + columns[0]->get(row_num, d.value); + } + + void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override + { + if (columns[0]->isNullable()) + addNull(place); + else + add(place, columns, 0, arena); + } + + void addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) + const override + { + if (if_argument_pos >= 0) + { + const auto & flags = assert_cast(*columns[if_argument_pos]).getData(); + size_t size = row_end - row_begin; + for (size_t i = 0; i < size; ++i) + { + size_t pos = First ? row_begin + i : row_end - 1 - i; + if (flags[pos]) + { + add(place, columns, pos, arena); + break; + } + } + } + else if (row_begin < row_end) + { + size_t pos = First ? row_begin : row_end - 1; + add(place, columns, pos, arena); + } + } + + void addBatchSinglePlaceNotNull( + size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override + { + /// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might + /// have discarded values that we need (NULLs) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called"); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + auto & d = this->data(place); + if (First && d.isSet()) + return; + + auto & other = this->data(rhs); + if (other.isSet()) + { + d.status = other.status; + d.value = other.value; + } + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + auto & d = this->data(place); + UInt8 k = d.status; + + writeBinaryLittleEndian(k, buf); + if (k == Data::Status::SetOther) + serialization->serializeBinary(d.value, buf, {}); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena *) const override + { + auto & d = this->data(place); + UInt8 k = Data::Status::NotSet; + readBinaryLittleEndian(k, buf); + d.status = static_cast(k); + if (d.status == Data::Status::NotSet) + return; + else if (d.status == Data::Status::SetNull) + { + if (!returns_nullable_type) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName()); + return; + } + else if (d.status == Data::Status::SetOther) + serialization->deserializeBinary(d.value, buf, {}); + else + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast(k), getName()); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto & d = this->data(place); + if (d.status == Data::Status::SetOther) + to.insert(d.value); + else + to.insertDefault(); + } + + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr & original_function, + const DataTypes & /*arguments*/, + const Array & /*params*/, + const AggregateFunctionProperties & /*properties*/) const override + { + return original_function; + } +}; + + +template +IAggregateFunction * createAggregateFunctionSingleValueRespectNulls( + const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertNoParameters(name, parameters); + assertUnary(name, argument_types); + + return new AggregateFunctionAnyRespectNulls(argument_types[0]); +} AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } -template -AggregateFunctionPtr createAggregateFunctionNullableAny( +AggregateFunctionPtr createAggregateFunctionAnyRespectNulls( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { - return AggregateFunctionPtr( - createAggregateFunctionSingleNullableValue( - name, argument_types, parameters, settings)); + return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls(name, argument_types, parameters, settings)); } AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) @@ -28,13 +215,10 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } -template -AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { - return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue< - AggregateFunctionsSingleValue, - AggregateFunctionAnyLastData, - RespectNulls>(name, argument_types, parameters, settings)); + return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls(name, argument_types, parameters, settings)); } AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) @@ -46,26 +230,28 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) { - AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true}; + AggregateFunctionProperties default_properties_for_respect_nulls + = {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true}; - factory.registerFunction("any", { createAggregateFunctionAny, properties }); + factory.registerFunction("any", {createAggregateFunctionAny, default_properties}); factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties }); - factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties }); + factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive); - // Synonyms for use as window functions. - factory.registerFunction("first_value", - { createAggregateFunctionAny, properties }, - AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("first_value_respect_nulls", - { createAggregateFunctionNullableAny, properties }, - AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("last_value", - { createAggregateFunctionAnyLast, properties }, - AggregateFunctionFactory::CaseInsensitive); - factory.registerFunction("last_value_respect_nulls", - { createAggregateFunctionNullableAnyLast, properties }, - AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls}); + factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive); + factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive); + + factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties}); + factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive); + + factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls}); + factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive); + + factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties}); + + factory.registerNullsActionTransformation("any", "any_respect_nulls"); + factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls"); } } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 9cab107e652..36a8617ba91 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -116,7 +116,7 @@ public: /// Return normalized state type: count() AggregateFunctionProperties properties; return std::make_shared( - AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{}); + AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{}); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override @@ -267,7 +267,7 @@ public: /// Return normalized state type: count() AggregateFunctionProperties properties; return std::make_shared( - AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{}); + AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{}); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index a195e0abf33..5c101888140 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -1,23 +1,11 @@ #include #include -#include -#include -#include #include - -#include - -#include - -#include -#include -#include - -#include - +#include #include - +#include +#include static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000; @@ -28,10 +16,11 @@ struct Settings; namespace ErrorCodes { - extern const int UNKNOWN_AGGREGATE_FUNCTION; - extern const int LOGICAL_ERROR; extern const int ILLEGAL_AGGREGATION; + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; extern const int TOO_LARGE_STRING_SIZE; + extern const int UNKNOWN_AGGREGATE_FUNCTION; } const String & getAggregateFunctionCanonicalNameIfAny(const String & name) @@ -59,6 +48,23 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat } } +void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls) +{ + if (!aggregate_functions.contains(source_ignores_nulls)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found"); + + if (!aggregate_functions.contains(target_respect_nulls)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found"); + + if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", source_ignores_nulls); + + if (!ignore_nulls.emplace(target_respect_nulls, source_ignores_nulls).second) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", target_respect_nulls); +} + static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types) { DataTypes res_types; @@ -70,7 +76,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types) } AggregateFunctionPtr AggregateFunctionFactory::get( - const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const + const String & name, + NullsAction action, + const DataTypes & argument_types, + const Array & parameters, + AggregateFunctionProperties & out_properties) const { /// This to prevent costly string manipulation in parsing the aggregate function combinators. /// Example: avgArrayArrayArrayArray...(1000 times)...Array @@ -81,8 +91,9 @@ AggregateFunctionPtr AggregateFunctionFactory::get( /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function. /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them, - /// they must handle the nullability themselves - auto properties = tryGetProperties(name); + /// they must handle the nullability themselves. + /// Aggregate functions such as any_value_respect_nulls are considered window functions in that sense + auto properties = tryGetProperties(name, action); bool is_window_function = properties.has_value() && properties->is_window_function; if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) @@ -98,8 +109,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get( bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), [](const auto & type) { return type->onlyNull(); }); - AggregateFunctionPtr nested_function = getImpl( - name, nested_types, nested_parameters, out_properties, has_null_arguments); + AggregateFunctionPtr nested_function = getImpl(name, action, nested_types, nested_parameters, out_properties, has_null_arguments); // Pure window functions are not real aggregate functions. Applying // combinators doesn't make sense for them, they must handle the @@ -110,22 +120,54 @@ AggregateFunctionPtr AggregateFunctionFactory::get( return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters); } - auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false); + auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false); if (!with_original_arguments) throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr"); return with_original_arguments; } +std::optional +AggregateFunctionFactory::getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const +{ + if (action == NullsAction::RESPECT_NULLS) + { + if (auto it = respect_nulls.find(name); it == respect_nulls.end()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} does not support RESPECT NULLS", name); + else if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end()) + return {associated_it->second}; + else + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} RESPECT NULLS')", it->second, name); + } + + if (action == NullsAction::IGNORE_NULLS) + { + if (auto it = ignore_nulls.find(name); it != ignore_nulls.end()) + { + if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end()) + return {associated_it->second}; + else + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} IGNORE NULLS')", it->second, name); + } + /// We don't throw for IGNORE NULLS of other functions because that's the default in CH + } + + return {}; +} + AggregateFunctionPtr AggregateFunctionFactory::getImpl( const String & name_param, + NullsAction action, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties, bool has_null_arguments) const { String name = getAliasToOrName(name_param); + String case_insensitive_name; bool is_case_insensitive = false; Value found; @@ -135,10 +177,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( found = it->second; } - if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) + if (!found.creator) { - found = jt->second; - is_case_insensitive = true; + case_insensitive_name = Poco::toLower(name); + if (auto jt = case_insensitive_aggregate_functions.find(case_insensitive_name); jt != case_insensitive_aggregate_functions.end()) + { + found = jt->second; + is_case_insensitive = true; + } } ContextPtr query_context; @@ -147,11 +193,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( if (found.creator) { - out_properties = found.properties; + auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? case_insensitive_name : name, action); + if (opt) + found = *opt; + out_properties = found.properties; if (query_context && query_context->getSettingsRef().log_queries) query_context->addQueryFactoriesInfo( - Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name); + Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? case_insensitive_name : name); /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. if (!out_properties.returns_default_when_only_null && has_null_arguments) @@ -196,7 +245,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( DataTypes nested_types = combinator->transformArguments(argument_types); Array nested_parameters = combinator->transformParameters(parameters); - AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties); + AggregateFunctionPtr nested_function = get(nested_name, action, nested_types, nested_parameters, out_properties); return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters); } @@ -213,16 +262,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info); } - -AggregateFunctionPtr AggregateFunctionFactory::tryGet( - const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const -{ - return isAggregateFunctionName(name) - ? get(name, argument_types, parameters, out_properties) - : nullptr; -} - -std::optional AggregateFunctionFactory::tryGetProperties(String name) const +std::optional AggregateFunctionFactory::tryGetProperties(String name, NullsAction action) const { if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH); @@ -231,6 +271,8 @@ std::optional AggregateFunctionFactory::tryGetPrope { name = getAliasToOrName(name); Value found; + String lower_case_name; + bool is_case_insensitive = false; /// Find by exact match. if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) @@ -238,11 +280,23 @@ std::optional AggregateFunctionFactory::tryGetPrope found = it->second; } - if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) - found = jt->second; + if (!found.creator) + { + lower_case_name = Poco::toLower(name); + if (auto jt = case_insensitive_aggregate_functions.find(lower_case_name); jt != case_insensitive_aggregate_functions.end()) + { + is_case_insensitive = true; + found = jt->second; + } + } if (found.creator) + { + auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? lower_case_name : name, action); + if (opt) + return opt->properties; return found.properties; + } /// Combinators of aggregate functions. /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb', @@ -262,27 +316,29 @@ std::optional AggregateFunctionFactory::tryGetPrope } -bool AggregateFunctionFactory::isAggregateFunctionName(String name) const +bool AggregateFunctionFactory::isAggregateFunctionName(const String & name_) const { - if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH) + if (name_.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH); - while (true) + if (aggregate_functions.contains(name_) || isAlias(name_)) + return true; + + String name_lowercase = Poco::toLower(name_); + if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase)) + return true; + + String name = name_; + while (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) { - if (aggregate_functions.contains(name) || isAlias(name)) - return true; + name = name.substr(0, name.size() - combinator->getName().size()); + name_lowercase = name_lowercase.substr(0, name_lowercase.size() - combinator->getName().size()); - String name_lowercase = Poco::toLower(name); - if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase)) + if (aggregate_functions.contains(name) || isAlias(name) || case_insensitive_aggregate_functions.contains(name_lowercase) + || isAlias(name_lowercase)) return true; - - if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name)) - { - name = name.substr(0, name.size() - combinator->getName().size()); - } - else - return false; } + return false; } AggregateFunctionFactory & AggregateFunctionFactory::instance() diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index dab0d28e851..b1dc422fcb0 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -1,9 +1,9 @@ #pragma once #include -#include #include - +#include +#include #include #include @@ -62,36 +62,44 @@ public: Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + /// Register how to transform from one aggregate function to other based on NullsAction + /// Registers them both ways: + /// SOURCE + RESPECT NULLS will be transformed to TARGET + /// TARGET + IGNORE NULLS will be transformed to SOURCE + void registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls); + /// Throws an exception if not found. AggregateFunctionPtr get(const String & name, - const DataTypes & argument_types, - const Array & parameters, - AggregateFunctionProperties & out_properties) const; - - /// Returns nullptr if not found. - AggregateFunctionPtr tryGet( - const String & name, + NullsAction action, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const; /// Get properties if the aggregate function exists. - std::optional tryGetProperties(String name) const; + std::optional tryGetProperties(String name, NullsAction action) const; - bool isAggregateFunctionName(String name) const; + bool isAggregateFunctionName(const String & name) const; private: AggregateFunctionPtr getImpl( const String & name, + NullsAction action, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties, bool has_null_arguments) const; using AggregateFunctions = std::unordered_map; + using ActionMap = std::unordered_map; AggregateFunctions aggregate_functions; + /// Mapping from functions with `RESPECT NULLS` modifier to actual aggregate function names + /// Example: `any(x) RESPECT NULLS` should be executed as function `any_respect_nulls` + ActionMap respect_nulls; + /// Same as above for `IGNORE NULLS` modifier + ActionMap ignore_nulls; + std::optional getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const; /// Case insensitive aggregate functions will be additionally added here with lowercased name. AggregateFunctions case_insensitive_aggregate_functions; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp deleted file mode 100644 index debc9b6d565..00000000000 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -struct Settings; - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - -template