mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge branch 'master' into valid_until_per_auth_method
This commit is contained in:
commit
5ee0a515c9
3
.github/workflows/create_release.yml
vendored
3
.github/workflows/create_release.yml
vendored
@ -3,6 +3,9 @@ name: CreateRelease
|
||||
concurrency:
|
||||
group: release
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
|
4
.github/workflows/docker_test_images.yml
vendored
4
.github/workflows/docker_test_images.yml
vendored
@ -1,4 +1,5 @@
|
||||
name: Build docker images
|
||||
|
||||
'on':
|
||||
workflow_call:
|
||||
inputs:
|
||||
@ -12,6 +13,9 @@ name: Build docker images
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
jobs:
|
||||
DockerBuildAarch64:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
|
4
.github/workflows/reusable_build_stage.yml
vendored
4
.github/workflows/reusable_build_stage.yml
vendored
@ -1,7 +1,11 @@
|
||||
### FIXME: merge reusable_test.yml and reusable_build.yml as they are almost identical
|
||||
# and then merge reusable_build_stage.yml and reusable_test_stage.yml
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
name: BuildStageWF
|
||||
|
||||
'on':
|
||||
workflow_call:
|
||||
inputs:
|
||||
|
4
.github/workflows/reusable_test_stage.yml
vendored
4
.github/workflows/reusable_test_stage.yml
vendored
@ -1,4 +1,8 @@
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
name: StageWF
|
||||
|
||||
'on':
|
||||
workflow_call:
|
||||
inputs:
|
||||
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -363,6 +363,12 @@
|
||||
[submodule "contrib/double-conversion"]
|
||||
path = contrib/double-conversion
|
||||
url = https://github.com/ClickHouse/double-conversion.git
|
||||
[submodule "contrib/mongo-cxx-driver"]
|
||||
path = contrib/mongo-cxx-driver
|
||||
url = https://github.com/ClickHouse/mongo-cxx-driver.git
|
||||
[submodule "contrib/mongo-c-driver"]
|
||||
path = contrib/mongo-c-driver
|
||||
url = https://github.com/ClickHouse/mongo-c-driver.git
|
||||
[submodule "contrib/numactl"]
|
||||
path = contrib/numactl
|
||||
url = https://github.com/ClickHouse/numactl.git
|
||||
|
168
CHANGELOG.md
168
CHANGELOG.md
@ -1,5 +1,6 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v24.8 LTS, 2024-08-20](#243)**<br/>
|
||||
**[ClickHouse release v24.9, 2024-09-26](#249)**<br/>
|
||||
**[ClickHouse release v24.8 LTS, 2024-08-20](#248)**<br/>
|
||||
**[ClickHouse release v24.7, 2024-07-30](#247)**<br/>
|
||||
**[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
|
||||
**[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
|
||||
@ -11,6 +12,168 @@
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### <a id="249"></a> ClickHouse release 24.9, 2024-09-26
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Expressions like `a[b].c` are supported for named tuples, as well as named subscripts from arbitrary expressions, e.g., `expr().name`. This is useful for processing JSON. This closes [#54965](https://github.com/ClickHouse/ClickHouse/issues/54965). In previous versions, an expression of form `expr().name` was parsed as `tupleElement(expr(), name)`, and the query analyzer was searching for a column `name` rather than for the corresponding tuple element; while in the new version, it is changed to `tupleElement(expr(), 'name')`. In most cases, the previous version was not working, but it is possible to imagine a very unusual scenario when this change could lead to incompatibility: if you stored names of tuple elements in a column or an alias, that was named differently than the tuple element's name: `SELECT 'b' AS a, CAST([tuple(123)] AS 'Array(Tuple(b UInt8))') AS t, t[1].a`. It is very unlikely that you used such queries, but we still have to mark this change as potentially backward incompatible. [#68435](https://github.com/ClickHouse/ClickHouse/pull/68435) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* When the setting `print_pretty_type_names` is enabled, it will print `Tuple` data type in a pretty form in `SHOW CREATE TABLE` statements, `formatQuery` function, and in the interactive mode in `clickhouse-client` and `clickhouse-local`. In previous versions, this setting was only applied to `DESCRIBE` queries and `toTypeName`. This closes [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). [#68492](https://github.com/ClickHouse/ClickHouse/pull/68492) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Do not allow explicitly specifying UUID when creating a table in `Replicated` databases. Also, do not allow explicitly specifying Keeper path and replica name for *MergeTree tables in Replicated databases. It introduces a new setting `database_replicated_allow_explicit_uuid` and changes the type of `database_replicated_allow_replicated_engine_arguments` from Bool to UInt64 [#66104](https://github.com/ClickHouse/ClickHouse/pull/66104) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
|
||||
#### New Feature
|
||||
* Allow a user to have multiple authentication methods instead of only one. Allow authentication methods to be reset to most recently added method. If you want to run instances on 24.8 and one on 24.9 for some time, it's better to set `max_authentication_methods_per_user` = 1 for that period to avoid potential incompatibilities. [#65277](https://github.com/ClickHouse/ClickHouse/pull/65277) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Add support for `ATTACH PARTITION ALL FROM`. [#61987](https://github.com/ClickHouse/ClickHouse/pull/61987) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* Add the `input_format_json_empty_as_default` setting which, when enabled, treats empty fields in JSON inputs as default values. Closes [#59339](https://github.com/ClickHouse/ClickHouse/issues/59339). [#66782](https://github.com/ClickHouse/ClickHouse/pull/66782) ([Alexis Arnaud](https://github.com/a-a-f)).
|
||||
* Added functions `overlay` and `overlayUTF8` which replace parts of a string by another string. Example: `SELECT overlay('Hello New York', 'Jersey', 11)` returns `Hello New Jersey`. [#66933](https://github.com/ClickHouse/ClickHouse/pull/66933) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add support for lightweight deletes in partition `DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr; ` [#67805](https://github.com/ClickHouse/ClickHouse/pull/67805) ([sunny](https://github.com/sunny19930321)).
|
||||
* Implemented comparison for `Interval` data type values of different domains (such as seconds and minutes) so they are converting now to the least supertype. [#68057](https://github.com/ClickHouse/ClickHouse/pull/68057) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Add `create_if_not_exists` setting to default to `IF NOT EXISTS` behavior during CREATE statements. [#68164](https://github.com/ClickHouse/ClickHouse/pull/68164) ([Peter Nguyen](https://github.com/petern48)).
|
||||
* Makes it possible to read `Iceberg` tables in Azure and locally. [#68210](https://github.com/ClickHouse/ClickHouse/pull/68210) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
* Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'`. [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
|
||||
* Add storage encryption for named collections. [#68615](https://github.com/ClickHouse/ClickHouse/pull/68615) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Add virtual column `_headers` for the `URL` table engine. Closes [#65026](https://github.com/ClickHouse/ClickHouse/issues/65026). [#68867](https://github.com/ClickHouse/ClickHouse/pull/68867) ([flynn](https://github.com/ucasfl)).
|
||||
* Add `system.projections` table to track available projections. [#68901](https://github.com/ClickHouse/ClickHouse/pull/68901) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Add new function `arrayZipUnaligned` for spark compatibility (which is named `arrays_zip` in Spark), which allowed unaligned arrays based on original `arrayZip`. [#69030](https://github.com/ClickHouse/ClickHouse/pull/69030) ([李扬](https://github.com/taiyang-li)).
|
||||
* Added `cp`/`mv` commands for the keeper client command line application which atomically copies/moves node. [#69034](https://github.com/ClickHouse/ClickHouse/pull/69034) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Adds argument `scale` (default: `true`) to function `arrayAUC` which allows to skip the normalization step (issue [#69609](https://github.com/ClickHouse/ClickHouse/issues/69609)). [#69717](https://github.com/ClickHouse/ClickHouse/pull/69717) ([gabrielmcg44](https://github.com/gabrielmcg44)).
|
||||
|
||||
#### Experimental feature
|
||||
* Adds a setting `input_format_try_infer_variants` which allows `Variant` type to be inferred during schema inference for text formats when there is more than one possible type for column/array elements. [#63798](https://github.com/ClickHouse/ClickHouse/pull/63798) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Add aggregate functions `distinctDynamicTypes`/`distinctJSONPaths`/`distinctJSONPathsAndTypes` for better introspection of JSON column type content. [#68463](https://github.com/ClickHouse/ClickHouse/pull/68463) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* New algorithm to determine the unit of marks distribution between parallel replicas by a consistent hash. Different numbers of marks chosen for different read patterns to improve performance. [#68424](https://github.com/ClickHouse/ClickHouse/pull/68424) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Previously the algorithmic complexity of part deduplication logic in parallel replica announcement handling was O(n^2) which could take noticeable time for tables with many part (or partitions). This change makes the complexity O(n*log(n)). [#69596](https://github.com/ClickHouse/ClickHouse/pull/69596) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Refreshable materialized view improvements: append mode (`... REFRESH EVERY 1 MINUTE APPEND ...`) to add rows to existing table instead of overwriting the whole table, retries (disabled by default, configured in SETTINGS section of the query), `SYSTEM WAIT VIEW <name>` query that waits for the currently running refresh, some fixes. [#58934](https://github.com/ClickHouse/ClickHouse/pull/58934) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Added `min_max`as a new type of (experimental) statistics. It supports estimating range predicates over numeric columns, e.g. `x < 100`. [#67013](https://github.com/ClickHouse/ClickHouse/pull/67013) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Improve castOrDefault from Variant/Dynamic columns so it works when inner types are not convertable at all. [#67150](https://github.com/ClickHouse/ClickHouse/pull/67150) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Replication of subset of columns is now available through MaterializedPostgreSQL. Closes [#33748](https://github.com/ClickHouse/ClickHouse/issues/33748). [#69092](https://github.com/ClickHouse/ClickHouse/pull/69092) ([Kruglov Kirill](https://github.com/1on)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Implemented reading of required files only for Hive partitioning. [#68963](https://github.com/ClickHouse/ClickHouse/pull/68963) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Improve the JOIN performance by rearranging the right table by keys while the table keys are dense in the LEFT or INNER hash joins. [#60341](https://github.com/ClickHouse/ClickHouse/pull/60341) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
* Improve ALL JOIN performance by appending the list of rows lazily. [#63677](https://github.com/ClickHouse/ClickHouse/pull/63677) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
* Load filesystem cache metadata asynchronously during the boot process, in order to make restarts faster (controlled by setting `load_metadata_asynchronously`). [#65736](https://github.com/ClickHouse/ClickHouse/pull/65736) ([Daniel Pozo Escalona](https://github.com/danipozo)).
|
||||
* Functions `array` and `map` were optimized to process certain common cases much faster. [#67707](https://github.com/ClickHouse/ClickHouse/pull/67707) ([李扬](https://github.com/taiyang-li)).
|
||||
* Trivial optimize on ORC strings reading especially when a column contains no NULLs. [#67794](https://github.com/ClickHouse/ClickHouse/pull/67794) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Speed up requests to S3 when a profile is not set, credentials are not set, and IMDS is not available (for example, when you are querying a public bucket on a machine outside of a cloud). This closes [#52771](https://github.com/ClickHouse/ClickHouse/issues/52771). [#68082](https://github.com/ClickHouse/ClickHouse/pull/68082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Devirtualize format reader in `RowInputFormatWithNamesAndTypes` for some performance improvement. [#68437](https://github.com/ClickHouse/ClickHouse/pull/68437) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add the parallel merge for `uniq` aggregate function when aggregating with a group by key to maximize the CPU utilization. [#68441](https://github.com/ClickHouse/ClickHouse/pull/68441) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Add settings `output_format_orc_dictionary_key_size_threshold` to allow user to enable dict encoding for string column in `ORC` output format. It helps reduce the output `ORC` file size and improve reading performance significantly. [#68591](https://github.com/ClickHouse/ClickHouse/pull/68591) ([李扬](https://github.com/taiyang-li)).
|
||||
* Introduce new Keeper request RemoveRecursive which removes node with all it's subtree. [#69332](https://github.com/ClickHouse/ClickHouse/pull/69332) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Speedup insertion performance into a table with a vector similarity index by adding data to the vector index in parallel. [#69493](https://github.com/ClickHouse/ClickHouse/pull/69493) ([flynn](https://github.com/ucasfl)).
|
||||
* Reduce memory usage of inserts to JSON by using adaptive write buffer size. A lot of files created by JSON column in wide part contains small amount of data and it doesn't make sense to allocate 1MB buffer for them. [#69272](https://github.com/ClickHouse/ClickHouse/pull/69272) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Avoid returning a thread in the concurrent hash join threadpool to avoid query excessively spawn threads. [#69406](https://github.com/ClickHouse/ClickHouse/pull/69406) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
|
||||
#### Improvement
|
||||
* CREATE TABLE AS now copies PRIMARY KEY, ORDER BY, and similar clauses. Now it supports only for MergeTree family of table engines. [#69076](https://github.com/ClickHouse/ClickHouse/pull/69076) ([sakulali](https://github.com/sakulali)).
|
||||
* Hardened parts of the codebase related to parsing of small entities. The following (minor) bugs were found and fixed: - if a `DeltaLake` table is partitioned by Bool, the partition value is always interpreted as false; - `ExternalDistributed` table was using only a single shard in the provided addresses; the value of `max_threads` setting and similar were printed as `'auto(N)'` instead of `auto(N)`. [#52503](https://github.com/ClickHouse/ClickHouse/pull/52503) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Use cgroup-specific metrics for CPU usage accounting instead of system-wide metrics. [#62003](https://github.com/ClickHouse/ClickHouse/pull/62003) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* IO scheduling for remote S3 disks is now done on the level of HTTP socket streams (instead of the whole S3 requests) to resolve `bandwidth_limit` throttling issues. [#65182](https://github.com/ClickHouse/ClickHouse/pull/65182) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Functions `upperUTF8` and `lowerUTF8` were previously only able to uppercase / lowercase Cyrillic characters. This limitation is now removed and characters in arbitrary languages are uppercased/lowercased. Example: `SELECT upperUTF8('Süden')` now returns `SÜDEN`. [#65761](https://github.com/ClickHouse/ClickHouse/pull/65761) ([李扬](https://github.com/taiyang-li)).
|
||||
* When lightweight delete happens on a table with projection(s), despite users have choices either throw an exception (by default) or drop the projection when the lightweight delete would happen, now there is the third option to still have lightweight delete and then rebuild projection(s). [#66169](https://github.com/ClickHouse/ClickHouse/pull/66169) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Two options (`dns_allow_resolve_names_to_ipv4` and `dns_allow_resolve_names_to_ipv6`) have been added, to allow block connections ip family. [#66895](https://github.com/ClickHouse/ClickHouse/pull/66895) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Make Ctrl-Z ignorance configurable (ignore_shell_suspend) in clickhouse-client. [#67134](https://github.com/ClickHouse/ClickHouse/pull/67134) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Improve UTF-8 validation in JSON output formats. Ensures that valid JSON is generated in the case of certain byte sequences in the result data. [#67938](https://github.com/ClickHouse/ClickHouse/pull/67938) ([mwoenker](https://github.com/mwoenker)).
|
||||
* Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* ODBC: get http_max_tries from the server configuration. [#68128](https://github.com/ClickHouse/ClickHouse/pull/68128) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)).
|
||||
* Add wildcard support for user identification in X.509 SubjectAltName extension. [#68236](https://github.com/ClickHouse/ClickHouse/pull/68236) ([Marco Vilas Boas](https://github.com/marco-vb)).
|
||||
* Improve schema inference of date times. Now `DateTime64` used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Delete old code of named collections from dictionaries and substitute it to the new, which allows to use DDL created named collections in dictionaries. Closes [#60936](https://github.com/ClickHouse/ClickHouse/issues/60936), closes [#36890](https://github.com/ClickHouse/ClickHouse/issues/36890). [#68412](https://github.com/ClickHouse/ClickHouse/pull/68412) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Use HTTP/1.1 instead of HTTP/1.0 (set by default) for external HTTP authenticators. [#68456](https://github.com/ClickHouse/ClickHouse/pull/68456) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Added a new set of metrics for thread pool introspection, providing deeper insights into thread pool performance and behavior. [#68674](https://github.com/ClickHouse/ClickHouse/pull/68674) ([filimonov](https://github.com/filimonov)).
|
||||
* Support query parameters in async inserts with format `Values`. [#68741](https://github.com/ClickHouse/ClickHouse/pull/68741) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Support `Date32` on `dateTrunc` and `toStartOfInterval`. [#68874](https://github.com/ClickHouse/ClickHouse/pull/68874) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Add `plan_step_name` and `plan_step_description` columns to `system.processors_profile_log`. [#68954](https://github.com/ClickHouse/ClickHouse/pull/68954) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
|
||||
* Add CPU arch to the short fault information message. [#69037](https://github.com/ClickHouse/ClickHouse/pull/69037) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Queries will fail faster if a new Keeper connection cannot be established during retries. [#69148](https://github.com/ClickHouse/ClickHouse/pull/69148) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update Database Factory so it would be possible for user defined database engines to have arguments, settings and table overrides (similar to StorageFactory). [#69201](https://github.com/ClickHouse/ClickHouse/pull/69201) ([NikBarykin](https://github.com/NikBarykin)).
|
||||
* Restore mode that replaces all external table engines and functions to the `Null` engine (`restore_replace_external_engines_to_null`, `restore_replace_external_table_functions_to_null` settings) was failing if table had SETTINGS. Now it removes settings from table definition in this case and allows to restore such tables. [#69253](https://github.com/ClickHouse/ClickHouse/pull/69253) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* CLICKHOUSE_PASSWORD is correctly escaped for XML in clickhouse image's entrypoint. [#69301](https://github.com/ClickHouse/ClickHouse/pull/69301) ([aohoyd](https://github.com/aohoyd)).
|
||||
* Allow empty arguments for `arrayZip`/`arrayZipUnaligned`, as concat did in https://github.com/ClickHouse/ClickHouse/pull/65887. It is for spark compatiability in Gluten CH Backend. [#69576](https://github.com/ClickHouse/ClickHouse/pull/69576) ([李扬](https://github.com/taiyang-li)).
|
||||
* Support more advanced SSL options for Keeper's internal communication (e.g. private keys with passphrase). [#69582](https://github.com/ClickHouse/ClickHouse/pull/69582) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Index analysis can take noticeable time for big tables with many parts or partitions. This change should enable killing a heavy query at that stage. [#69606](https://github.com/ClickHouse/ClickHouse/pull/69606) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Masking sensitive info in `gcs` table function. [#69611](https://github.com/ClickHouse/ClickHouse/pull/69611) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Rebuild projection for merges that reduce number of rows. [#62364](https://github.com/ClickHouse/ClickHouse/pull/62364) ([cangyin](https://github.com/cangyin)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Fix attaching table when pg dbname contains "-" in the experimental and unsupported MaterializedPostgreSQL engine. [#62730](https://github.com/ClickHouse/ClickHouse/pull/62730) ([takakawa](https://github.com/takakawa)).
|
||||
* Fixed error on generated columns in the experimental and totally unsupported MaterializedPostgreSQL engine when adnum ordering is broken [#63161](https://github.com/ClickHouse/ClickHouse/issues/63161). Fixed error on id column with nextval expression as default in the experimental and totally unsupported MaterializedPostgreSQL when there are generated columns in table. Fixed error on dropping publication with symbols except \[a-z1-9-\]. [#67664](https://github.com/ClickHouse/ClickHouse/pull/67664) ([Kruglov Kirill](https://github.com/1on)).
|
||||
* Storage Join to support Nullable columns in the left table, close [#61247](https://github.com/ClickHouse/ClickHouse/issues/61247). [#66926](https://github.com/ClickHouse/ClickHouse/pull/66926) ([vdimir](https://github.com/vdimir)).
|
||||
* Incorrect query result with parallel replicas (distribute queries as well) when `IN` operator contains conversion to Decimal(). The bug was introduced with the new analyzer. [#67234](https://github.com/ClickHouse/ClickHouse/pull/67234) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix the problem that alter modify order by causes inconsistent metadata. [#67436](https://github.com/ClickHouse/ClickHouse/pull/67436) ([iceFireser](https://github.com/iceFireser)).
|
||||
* Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Fix when the index is not at the beginning of the tuple during `IN` query. [#67626](https://github.com/ClickHouse/ClickHouse/pull/67626) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix expiration in `RoleCache`. [#67748](https://github.com/ClickHouse/ClickHouse/pull/67748) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix window view missing blocks due to slow flush to view. [#67983](https://github.com/ClickHouse/ClickHouse/pull/67983) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix MSan issue caused by incorrect date format. [#68105](https://github.com/ClickHouse/ClickHouse/pull/68105) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix crash in `lag`/`lead` which is introduced in [#67091](https://github.com/ClickHouse/ClickHouse/issues/67091). [#68262](https://github.com/ClickHouse/ClickHouse/pull/68262) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix bug in key condition. [#68354](https://github.com/ClickHouse/ClickHouse/pull/68354) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix crash on drop or rename a role that is used in LDAP external user directory. [#68355](https://github.com/ClickHouse/ClickHouse/pull/68355) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix Progress column value of system.view_refreshes greater than 1 [#68377](https://github.com/ClickHouse/ClickHouse/issues/68377). [#68378](https://github.com/ClickHouse/ClickHouse/pull/68378) ([megao](https://github.com/jetgm)).
|
||||
* Process regexp flags correctly. [#68389](https://github.com/ClickHouse/ClickHouse/pull/68389) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* PostgreSQL-style cast operator (`::`) works correctly even for SQL-style hex and binary string literals (e.g., `SELECT x'414243'::String`). This closes [#68324](https://github.com/ClickHouse/ClickHouse/issues/68324). [#68482](https://github.com/ClickHouse/ClickHouse/pull/68482) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Minor patch for https://github.com/ClickHouse/ClickHouse/pull/68131. [#68494](https://github.com/ClickHouse/ClickHouse/pull/68494) ([Chang chen](https://github.com/baibaichen)).
|
||||
* Fix [#68239](https://github.com/ClickHouse/ClickHouse/issues/68239) SAMPLE n where n is an integer. [#68499](https://github.com/ClickHouse/ClickHouse/pull/68499) ([Denis Hananein](https://github.com/denis-hananein)).
|
||||
* Fix bug in mann-whitney-utest when the size of two districutions are not equal. [#68556](https://github.com/ClickHouse/ClickHouse/pull/68556) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* After unexpected restart, fail to start replication of ReplicatedMergeTree due to abnormal handling of covered-by-broken part. [#68584](https://github.com/ClickHouse/ClickHouse/pull/68584) ([baolin](https://github.com/baolinhuang)).
|
||||
* Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Full text index may filter out wrong columns when index multiple columns, it didn't reset row_id between different columns, the reproduce procedure is in tests/queries/0_stateless/03228_full_text_with_multi_col.sql. Without this. [#68644](https://github.com/ClickHouse/ClickHouse/pull/68644) ([siyuan](https://github.com/linkwk7)).
|
||||
* Fix invalid character '\t' and '\n' in replica_name when creating a Replicated table, which causes incorrect parsing of 'source replica' in LogEntry. Mentioned in issue [#68640](https://github.com/ClickHouse/ClickHouse/issues/68640). [#68645](https://github.com/ClickHouse/ClickHouse/pull/68645) ([Zhigao Hong](https://github.com/zghong)).
|
||||
* Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Make `ColumnsDescription::toString` format each column using the same `IAST::FormatState object`. This results in uniform columns metadata being written to disk and ZooKeeper. [#68733](https://github.com/ClickHouse/ClickHouse/pull/68733) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Fix merging of aggregated data for grouping sets. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix logical error, when we create a replicated merge tree, alter a column and then execute modify statistics. [#68820](https://github.com/ClickHouse/ClickHouse/pull/68820) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed asynchronous inserts in case when metadata of table is changed (by `ALTER ADD/MODIFY COLUMN` queries) after insert but before flush to the table. [#68837](https://github.com/ClickHouse/ClickHouse/pull/68837) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix unexpected exception when passing empty tuple in array. This fixes [#68618](https://github.com/ClickHouse/ClickHouse/issues/68618). [#68848](https://github.com/ClickHouse/ClickHouse/pull/68848) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix parsing pure metadata mutations commands. [#68935](https://github.com/ClickHouse/ClickHouse/pull/68935) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Don't use serializations cache in const Dynamic column methods. It could let to use-of-uninitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix parsing error when null should be inserted as default in some cases during JSON type parsing. [#68955](https://github.com/ClickHouse/ClickHouse/pull/68955) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix `Content-Encoding` not sent in some compressed responses. [#64802](https://github.com/ClickHouse/ClickHouse/issues/64802). [#68975](https://github.com/ClickHouse/ClickHouse/pull/68975) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* There were cases when path was concatenated incorrectly and had the `//` part in it, solving this problem using path normalization. [#69066](https://github.com/ClickHouse/ClickHouse/pull/69066) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fixed data race of progress indication in clickhouse-client during query canceling. [#69081](https://github.com/ClickHouse/ClickHouse/pull/69081) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fix a bug that the vector similarity index (currently experimental) was not utilized when used with cosine distance as distance function. [#69090](https://github.com/ClickHouse/ClickHouse/pull/69090) ([flynn](https://github.com/ucasfl)).
|
||||
* This change addresses an issue where attempting to create a Replicated database again after a server failure during the initial creation process could result in error. [#69102](https://github.com/ClickHouse/ClickHouse/pull/69102) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Don't infer Bool type from String in CSV when `input_format_csv_try_infer_numbers_from_strings = 1` because we don't allow reading bool values from strings. [#69109](https://github.com/ClickHouse/ClickHouse/pull/69109) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix explain ast insert queries parsing errors on client when `--multiquery` is enabled. [#69123](https://github.com/ClickHouse/ClickHouse/pull/69123) ([wxybear](https://github.com/wxybear)).
|
||||
* `UNION` clause in subqueries wasn't handled correctly in queries with parallel replicas and lead to LOGICAL_ERROR `Duplicate announcement received for replica`. [#69146](https://github.com/ClickHouse/ClickHouse/pull/69146) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix propogating structure argument in s3Cluster. Previously the `DEFAULT` expression of the column could be lost when sending the query to the replicas in s3Cluster. [#69147](https://github.com/ClickHouse/ClickHouse/pull/69147) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Respect format settings in Values format during conversion from expression to the destination type. [#69149](https://github.com/ClickHouse/ClickHouse/pull/69149) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix `clickhouse-client --queries-file` for readonly users (previously fails with `Cannot modify 'log_comment' setting in readonly mode`). [#69175](https://github.com/ClickHouse/ClickHouse/pull/69175) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix data race in clickhouse-client when it's piped to a process that terminated early. [#69186](https://github.com/ClickHouse/ClickHouse/pull/69186) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix incorrect results of Fix uniq and GROUP BY for JSON/Dynamic types. [#69203](https://github.com/ClickHouse/ClickHouse/pull/69203) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix the INFILE format detection for asynchronous inserts. If the format is not explicitly defined in the FORMAT clause, it can be detected from the INFILE file extension. [#69237](https://github.com/ClickHouse/ClickHouse/pull/69237) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* After [this issue](https://github.com/ClickHouse/ClickHouse/pull/59946#issuecomment-1943653197) there are quite a few table replicas in production such that their `metadata_version` node value is both equal to `0` and is different from the respective table's `metadata` node version. This leads to `alter` queries failing on such replicas. [#69274](https://github.com/ClickHouse/ClickHouse/pull/69274) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Mark Dynamic type as not safe primary key type to avoid issues with Fields. [#69311](https://github.com/ClickHouse/ClickHouse/pull/69311) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Improve restoring of access entities' dependencies. [#69346](https://github.com/ClickHouse/ClickHouse/pull/69346) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix undefined behavior when all connection attempts fail getting a connection for insertions. [#69390](https://github.com/ClickHouse/ClickHouse/pull/69390) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Close [#69135](https://github.com/ClickHouse/ClickHouse/issues/69135). If we try to reuse joined data for `cross` join, but this could not happen in ClickHouse at present. It's better to keep `have_compressed` in `reuseJoinedData`. [#69404](https://github.com/ClickHouse/ClickHouse/pull/69404) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Make `materialize()` function return full column when parameter is a sparse column. [#69429](https://github.com/ClickHouse/ClickHouse/pull/69429) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fixed a `LOGICAL_ERROR` with function `sqidDecode` ([#69450](https://github.com/ClickHouse/ClickHouse/issues/69450)). [#69451](https://github.com/ClickHouse/ClickHouse/pull/69451) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Quick fix for s3queue problem on 24.6 or create query with database replicated. [#69454](https://github.com/ClickHouse/ClickHouse/pull/69454) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed case when memory consumption was too high because of the squashing in `INSERT INTO ... SELECT` or `CREATE TABLE AS SELECT` queries. [#69469](https://github.com/ClickHouse/ClickHouse/pull/69469) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Statements `SHOW COLUMNS` and `SHOW INDEX` now work properly if the table has dots in its name. [#69514](https://github.com/ClickHouse/ClickHouse/pull/69514) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Usage of the query cache for queries with an overflow mode != 'throw' is now disallowed. This prevents situations where potentially truncated and incorrect query results could be stored in the query cache. (issue [#67476](https://github.com/ClickHouse/ClickHouse/issues/67476)). [#69549](https://github.com/ClickHouse/ClickHouse/pull/69549) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Keep original order of conditions during move to prewhere. Previously the order could change and it could lead to failing queries when the order is important. [#69560](https://github.com/ClickHouse/ClickHouse/pull/69560) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix Keeper multi-request preprocessing after ZNOAUTH error. [#69627](https://github.com/ClickHouse/ClickHouse/pull/69627) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix METADATA_MISMATCH that might have happened due to TTL with a WHERE clause in DatabaseReplicated when creating a new replica. [#69736](https://github.com/ClickHouse/ClickHouse/pull/69736) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix `StorageS3(Azure)Queue` settings `tracked_file_ttl_sec`. We wrote it to keeper with key `tracked_file_ttl_sec`, but read as `tracked_files_ttl_sec`, which was a typo. [#69742](https://github.com/ClickHouse/ClickHouse/pull/69742) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Use tryconvertfieldtotype in gethyperrectangleforrowgroup. [#69745](https://github.com/ClickHouse/ClickHouse/pull/69745) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Revert "Fix prewhere without columns and without adaptive index granularity (almost w/o anything)"'. Due to the reverted changes some errors might happen when reading data parts produced by old CH releases (presumably 2021 or older). [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
|
||||
### <a id="248"></a> ClickHouse release 24.8 LTS, 2024-08-20
|
||||
|
||||
#### Backward Incompatible Change
|
||||
@ -28,7 +191,7 @@
|
||||
* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)).
|
||||
* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)).
|
||||
* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Interpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
|
||||
@ -80,7 +243,6 @@
|
||||
* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add `-no-pie` to Aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)).
|
||||
* Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix settings and `current_database` in `system.processes` for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove unnecessary logs for non-replicated `MergeTree`. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
|
||||
|
||||
* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
|
||||
* [v24.10 Community Call](https://clickhouse.com/company/events/v24-10-community-release-call) - October 31
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
@ -42,13 +42,11 @@ Keep an eye out for upcoming meetups and events around the world. Somewhere else
|
||||
|
||||
Upcoming meetups
|
||||
|
||||
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
|
||||
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
|
||||
* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
|
||||
* [Singapore Meetup](https://www.meetup.com/clickhouse-singapore-meetup-group/events/303212064/) - October 3
|
||||
* [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22
|
||||
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - October 29
|
||||
* [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
|
||||
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - November 12
|
||||
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
|
||||
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
|
||||
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
|
||||
@ -67,6 +65,8 @@ Recently completed meetups
|
||||
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
|
||||
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
|
@ -14,9 +14,10 @@ The following versions of ClickHouse server are currently supported with securit
|
||||
|
||||
| Version | Supported |
|
||||
|:-|:-|
|
||||
| 24.9 | ✔️ |
|
||||
| 24.8 | ✔️ |
|
||||
| 24.7 | ✔️ |
|
||||
| 24.6 | ✔️ |
|
||||
| 24.6 | ❌ |
|
||||
| 24.5 | ❌ |
|
||||
| 24.4 | ❌ |
|
||||
| 24.3 | ✔️ |
|
||||
|
@ -3,7 +3,11 @@ add_subdirectory (Data)
|
||||
add_subdirectory (Data/ODBC)
|
||||
add_subdirectory (Foundation)
|
||||
add_subdirectory (JSON)
|
||||
add_subdirectory (MongoDB)
|
||||
|
||||
if (USE_MONGODB)
|
||||
add_subdirectory(MongoDB)
|
||||
endif()
|
||||
|
||||
add_subdirectory (Net)
|
||||
add_subdirectory (NetSSL_OpenSSL)
|
||||
add_subdirectory (Redis)
|
||||
|
17
ci_v2/docker/style-test/Dockerfile
Normal file
17
ci_v2/docker/style-test/Dockerfile
Normal file
@ -0,0 +1,17 @@
|
||||
# docker build -t clickhouse/style-test .
|
||||
FROM ubuntu:22.04
|
||||
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
aspell \
|
||||
libxml2-utils \
|
||||
python3-pip \
|
||||
locales \
|
||||
git \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
4
ci_v2/docker/style-test/requirements.txt
Normal file
4
ci_v2/docker/style-test/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
requests==2.32.3
|
||||
yamllint==1.26.3
|
||||
codespell==2.2.1
|
||||
https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
|
410
ci_v2/jobs/check_style.py
Normal file
410
ci_v2/jobs/check_style.py
Normal file
@ -0,0 +1,410 @@
|
||||
import math
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
from praktika.result import Result
|
||||
from praktika.utils import Shell, Utils
|
||||
|
||||
NPROC = multiprocessing.cpu_count()
|
||||
|
||||
|
||||
def chunk_list(data, n):
|
||||
"""Split the data list into n nearly equal-sized chunks."""
|
||||
chunk_size = math.ceil(len(data) / n)
|
||||
for i in range(0, len(data), chunk_size):
|
||||
yield data[i : i + chunk_size]
|
||||
|
||||
|
||||
def run_check_concurrent(check_name, check_function, files, nproc=NPROC):
|
||||
stop_watch = Utils.Stopwatch()
|
||||
|
||||
if not files:
|
||||
print(f"File list is empty [{files}]")
|
||||
raise
|
||||
|
||||
file_chunks = list(chunk_list(files, nproc))
|
||||
results = []
|
||||
|
||||
# Run check_function concurrently on each chunk
|
||||
with ProcessPoolExecutor(max_workers=NPROC) as executor:
|
||||
futures = [executor.submit(check_function, chunk) for chunk in file_chunks]
|
||||
# Wait for results and process them (optional)
|
||||
for future in futures:
|
||||
try:
|
||||
res = future.result()
|
||||
if res and res not in results:
|
||||
results.append(res)
|
||||
except Exception as e:
|
||||
results.append(f"Exception in {check_name}: {e}")
|
||||
|
||||
result = Result(
|
||||
name=check_name,
|
||||
status=Result.Status.SUCCESS if not results else Result.Status.FAILED,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=f"errors: {results}" if results else "",
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def run_simple_check(check_name, check_function, **kwargs):
|
||||
stop_watch = Utils.Stopwatch()
|
||||
|
||||
error = check_function(**kwargs)
|
||||
|
||||
result = Result(
|
||||
name=check_name,
|
||||
status=Result.Status.SUCCESS if not error else Result.Status.FAILED,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=error,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def run_check(check_name, check_function, files):
|
||||
return run_check_concurrent(check_name, check_function, files, nproc=1)
|
||||
|
||||
|
||||
def check_duplicate_includes(file_path):
|
||||
includes = []
|
||||
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
for line in f:
|
||||
if re.match(r"^#include ", line):
|
||||
includes.append(line.strip())
|
||||
|
||||
include_counts = {line: includes.count(line) for line in includes}
|
||||
duplicates = {line: count for line, count in include_counts.items() if count > 1}
|
||||
|
||||
if duplicates:
|
||||
return f"{file_path}: {duplicates}"
|
||||
return ""
|
||||
|
||||
|
||||
def check_whitespaces(file_paths):
|
||||
for file in file_paths:
|
||||
exit_code, out, err = Shell.get_res_stdout_stderr(
|
||||
f'./ci_v2/jobs/scripts/check_style/double_whitespaces.pl "{file}"',
|
||||
verbose=False,
|
||||
)
|
||||
if out or err:
|
||||
return out + " err: " + err
|
||||
return ""
|
||||
|
||||
|
||||
def check_yamllint(file_paths):
|
||||
file_paths = " ".join([f"'{file}'" for file in file_paths])
|
||||
exit_code, out, err = Shell.get_res_stdout_stderr(
|
||||
f"yamllint --config-file=./.yamllint {file_paths}", verbose=False
|
||||
)
|
||||
return out or err
|
||||
|
||||
|
||||
def check_xmllint(file_paths):
|
||||
if not isinstance(file_paths, list):
|
||||
file_paths = [file_paths]
|
||||
file_paths = " ".join([f"'{file}'" for file in file_paths])
|
||||
exit_code, out, err = Shell.get_res_stdout_stderr(
|
||||
f"xmllint --noout --nonet {file_paths}", verbose=False
|
||||
)
|
||||
return out or err
|
||||
|
||||
|
||||
def check_functional_test_cases(files):
|
||||
"""
|
||||
Queries with event_date should have yesterday() not today()
|
||||
NOTE: it is not that accuate, but at least something.
|
||||
"""
|
||||
|
||||
patterns = [
|
||||
re.compile(
|
||||
r"(?i)where.*?\bevent_date\s*(=|>=)\s*today\(\)(?!\s*-\s*1)",
|
||||
re.IGNORECASE | re.DOTALL,
|
||||
)
|
||||
]
|
||||
|
||||
errors = []
|
||||
for test_case in files:
|
||||
try:
|
||||
with open(test_case, "r", encoding="utf-8", errors="replace") as f:
|
||||
file_content = " ".join(
|
||||
f.read().splitlines()
|
||||
) # Combine lines into a single string
|
||||
|
||||
# Check if any pattern matches in the concatenated string
|
||||
if any(pattern.search(file_content) for pattern in patterns):
|
||||
errors.append(
|
||||
f"event_date should be filtered using >=yesterday() in {test_case} (to avoid flakiness)"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Error checking {test_case}: {e}")
|
||||
|
||||
for test_case in files:
|
||||
if "fail" in test_case:
|
||||
errors.append(f"test case {test_case} includes 'fail' in its name")
|
||||
|
||||
return " ".join(errors)
|
||||
|
||||
|
||||
def check_gaps_in_tests_numbers(file_paths, gap_threshold=100):
|
||||
test_numbers = set()
|
||||
|
||||
pattern = re.compile(r"(\d+)")
|
||||
|
||||
for file in file_paths:
|
||||
file_name = os.path.basename(file)
|
||||
match = pattern.search(file_name)
|
||||
if match:
|
||||
test_numbers.add(int(match.group(1)))
|
||||
|
||||
sorted_numbers = sorted(test_numbers)
|
||||
large_gaps = []
|
||||
for i in range(1, len(sorted_numbers)):
|
||||
prev_num = sorted_numbers[i - 1]
|
||||
next_num = sorted_numbers[i]
|
||||
diff = next_num - prev_num
|
||||
if diff >= gap_threshold:
|
||||
large_gaps.append(f"Gap ({prev_num}, {next_num}) > {gap_threshold}")
|
||||
|
||||
return large_gaps
|
||||
|
||||
|
||||
def check_broken_links(path, exclude_paths):
|
||||
broken_symlinks = []
|
||||
|
||||
for path in Path(path).rglob("*"):
|
||||
if any(exclude_path in str(path) for exclude_path in exclude_paths):
|
||||
continue
|
||||
if path.is_symlink():
|
||||
if not path.exists():
|
||||
broken_symlinks.append(str(path))
|
||||
|
||||
if broken_symlinks:
|
||||
for symlink in broken_symlinks:
|
||||
print(symlink)
|
||||
return f"Broken symlinks found: {broken_symlinks}"
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def check_cpp_code():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check_cpp.sh"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_repo_submodules():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check_submodules.sh"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_other():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/checks_to_refactor.sh"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_codespell():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check_typos.sh"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_aspell():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check_aspell.sh"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_mypy():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check-mypy"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_pylint():
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
"./ci_v2/jobs/scripts/check_style/check-pylint"
|
||||
)
|
||||
if err:
|
||||
out += err
|
||||
return out
|
||||
|
||||
|
||||
def check_file_names(files):
|
||||
files_set = set()
|
||||
for file in files:
|
||||
file_ = file.lower()
|
||||
if file_ in files_set:
|
||||
return f"Non-uniq file name in lower case: {file}"
|
||||
files_set.add(file_)
|
||||
return ""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = []
|
||||
stop_watch = Utils.Stopwatch()
|
||||
|
||||
all_files = Utils.traverse_paths(
|
||||
include_paths=["."],
|
||||
exclude_paths=[
|
||||
"./.git",
|
||||
"./contrib",
|
||||
"./build",
|
||||
],
|
||||
not_exists_ok=True, # ./build may exist if runs locally
|
||||
)
|
||||
|
||||
cpp_files = Utils.traverse_paths(
|
||||
include_paths=["./src", "./base", "./programs", "./utils"],
|
||||
exclude_paths=[
|
||||
"./base/glibc-compatibility",
|
||||
"./contrib/consistent-hashing",
|
||||
"./base/widechar_width",
|
||||
],
|
||||
file_suffixes=[".h", ".cpp"],
|
||||
)
|
||||
|
||||
yaml_workflow_files = Utils.traverse_paths(
|
||||
include_paths=["./.github"],
|
||||
exclude_paths=[],
|
||||
file_suffixes=[".yaml", ".yml"],
|
||||
)
|
||||
|
||||
xml_files = Utils.traverse_paths(
|
||||
include_paths=["."],
|
||||
exclude_paths=["./.git", "./contrib/"],
|
||||
file_suffixes=[".xml"],
|
||||
)
|
||||
|
||||
functional_test_files = Utils.traverse_paths(
|
||||
include_paths=["./tests/queries"],
|
||||
exclude_paths=[],
|
||||
file_suffixes=[".sql", ".sh", ".py", ".j2"],
|
||||
)
|
||||
|
||||
results.append(
|
||||
Result(
|
||||
name="Read Files",
|
||||
status=Result.Status.SUCCESS,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
)
|
||||
)
|
||||
|
||||
results.append(
|
||||
run_check_concurrent(
|
||||
check_name="Whitespace Check",
|
||||
check_function=check_whitespaces,
|
||||
files=cpp_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check_concurrent(
|
||||
check_name="YamlLint Check",
|
||||
check_function=check_yamllint,
|
||||
files=yaml_workflow_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check_concurrent(
|
||||
check_name="XmlLint Check",
|
||||
check_function=check_xmllint,
|
||||
files=xml_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check_concurrent(
|
||||
check_name="Functional Tests scripts smoke check",
|
||||
check_function=check_functional_test_cases,
|
||||
files=functional_test_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check(
|
||||
check_name="Check Tests Numbers",
|
||||
check_function=check_gaps_in_tests_numbers,
|
||||
files=functional_test_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Broken Symlinks",
|
||||
check_function=check_broken_links,
|
||||
path="./",
|
||||
exclude_paths=["contrib/", "metadata/", "programs/server/data"],
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check CPP code",
|
||||
check_function=check_cpp_code,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Submodules",
|
||||
check_function=check_repo_submodules,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check(
|
||||
check_name="Check File Names",
|
||||
check_function=check_file_names,
|
||||
files=all_files,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Many Different Things",
|
||||
check_function=check_other,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Codespell",
|
||||
check_function=check_codespell,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Aspell",
|
||||
check_function=check_aspell,
|
||||
)
|
||||
)
|
||||
|
||||
res = Result.create_from(results=results, stopwatch=stop_watch).dump()
|
||||
|
||||
if not res.is_ok():
|
||||
print("Style check: failed")
|
||||
for result in results:
|
||||
if not result.is_ok():
|
||||
print("Failed check:")
|
||||
print(" | ", result)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Style check: ok")
|
3050
ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
Normal file
3050
ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt
Normal file
File diff suppressed because it is too large
Load Diff
59
ci_v2/jobs/scripts/check_style/check_aspell.sh
Executable file
59
ci_v2/jobs/scripts/check_style/check_aspell.sh
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# force-enable double star globbing
|
||||
shopt -s globstar
|
||||
|
||||
# Perform spell checking on the docs
|
||||
|
||||
if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then
|
||||
echo "Usage $0 [--help|-h] [-i [filename]]"
|
||||
echo " --help|-h: print this help"
|
||||
echo " -i: interactive mode. If filename is specified, check only this file, otherwise check all files"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ROOT_PATH="."
|
||||
|
||||
CHECK_LANG=en
|
||||
|
||||
ASPELL_IGNORE_PATH="${ROOT_PATH}/utils/check-style/aspell-ignore/${CHECK_LANG}"
|
||||
|
||||
if [[ ${1:-} == "-i" ]]; then
|
||||
if [[ ! -z ${2:-} ]]; then
|
||||
FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/${2}
|
||||
else
|
||||
FILES_TO_CHECK=${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md
|
||||
fi
|
||||
for fname in ${FILES_TO_CHECK}; do
|
||||
echo "Checking $fname"
|
||||
aspell --personal=aspell-dict.txt --add-sgml-skip=code --encoding=utf-8 --mode=markdown -W 3 --lang=${CHECK_LANG} --home-dir=${ASPELL_IGNORE_PATH} -c "$fname"
|
||||
done
|
||||
exit
|
||||
fi
|
||||
|
||||
STATUS=0
|
||||
for fname in ${ROOT_PATH}/docs/${CHECK_LANG}/**/*.md; do
|
||||
errors=$(cat "$fname" \
|
||||
| aspell list \
|
||||
-W 3 \
|
||||
--personal=aspell-dict.txt \
|
||||
--add-sgml-skip=code \
|
||||
--encoding=utf-8 \
|
||||
--mode=markdown \
|
||||
--lang=${CHECK_LANG} \
|
||||
--home-dir=${ASPELL_IGNORE_PATH} \
|
||||
| sort | uniq)
|
||||
if [ ! -z "$errors" ]; then
|
||||
STATUS=1
|
||||
echo "====== $fname ======"
|
||||
echo "$errors"
|
||||
fi
|
||||
done
|
||||
|
||||
if (( STATUS != 0 )); then
|
||||
echo "====== Errors found ======"
|
||||
echo "To exclude some words add them to the dictionary file \"${ASPELL_IGNORE_PATH}/aspell-dict.txt\""
|
||||
echo "You can also run ${0} -i to see the errors interactively and fix them or add to the dictionary file"
|
||||
fi
|
||||
|
||||
exit ${STATUS}
|
339
ci_v2/jobs/scripts/check_style/check_cpp.sh
Executable file
339
ci_v2/jobs/scripts/check_style/check_cpp.sh
Executable file
@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# For code formatting we have clang-format.
|
||||
#
|
||||
# But it's not sane to apply clang-format for whole code base,
|
||||
# because it sometimes makes worse for properly formatted files.
|
||||
#
|
||||
# It's only reasonable to blindly apply clang-format only in cases
|
||||
# when the code is likely to be out of style.
|
||||
#
|
||||
# For this purpose we have a script that will use very primitive heuristics
|
||||
# (simple regexps) to check if the code is likely to have basic style violations.
|
||||
# and then to run formatter only for the specified files.
|
||||
|
||||
LC_ALL="en_US.UTF-8"
|
||||
ROOT_PATH="."
|
||||
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
|
||||
|
||||
# From [1]:
|
||||
# But since array_to_string_internal() in array.c still loops over array
|
||||
# elements and concatenates them into a string, it's probably not more
|
||||
# efficient than the looping solutions proposed, but it's more readable.
|
||||
#
|
||||
# [1]: https://stackoverflow.com/a/15394738/328260
|
||||
function in_array()
|
||||
{
|
||||
local IFS="|"
|
||||
local value=$1 && shift
|
||||
|
||||
[[ "${IFS}${*}${IFS}" =~ "${IFS}${value}${IFS}" ]]
|
||||
}
|
||||
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
|
||||
# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
|
||||
grep -v -P '(//|:\s+\*|\$\(\()| \)"'
|
||||
# single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
|
||||
|
||||
# Tabs
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep $@ -F $'\t'
|
||||
|
||||
# // namespace comments are unneeded
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep $@ -P '}\s*//+\s*namespace\s*'
|
||||
|
||||
# Broken symlinks
|
||||
find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
|
||||
|
||||
# Duplicated or incorrect setting declarations
|
||||
SETTINGS_FILE=$(mktemp)
|
||||
cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " " substr($1, 3, length($1) - 3) " SettingsDeclaration" }' > ${SETTINGS_FILE}
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep "extern const Settings" -T | awk '{print substr($5, 0, length($5) -1) " " substr($4, 9) " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE}
|
||||
|
||||
# Duplicate extern declarations for settings
|
||||
awk '{if (seen[$0]++) print $3 " -> " $1 ;}' ${SETTINGS_FILE} | while read line;
|
||||
do
|
||||
echo "Found duplicated setting declaration in: $line"
|
||||
done
|
||||
|
||||
# Incorrect declarations for settings
|
||||
for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sort | uniq | awk '{ print $1 }' | sort | uniq -d);
|
||||
do
|
||||
expected=$(grep "^$setting " ${SETTINGS_FILE} | grep SettingsDeclaration | awk '{ print $2 }')
|
||||
grep "^$setting " ${SETTINGS_FILE} | grep -v " $expected" | awk '{ print $3 " found setting " $1 " with type " $2 }' | while read line;
|
||||
do
|
||||
echo "In $line but it should be $expected"
|
||||
done
|
||||
done
|
||||
|
||||
rm ${SETTINGS_FILE}
|
||||
|
||||
# Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
|
||||
declare -A EXTERN_TYPES
|
||||
EXTERN_TYPES[ErrorCodes]=int
|
||||
EXTERN_TYPES[ProfileEvents]=Event
|
||||
EXTERN_TYPES[CurrentMetrics]=Metric
|
||||
|
||||
EXTERN_TYPES_EXCLUDES=(
|
||||
ProfileEvents::global_counters
|
||||
ProfileEvents::Event
|
||||
ProfileEvents::Count
|
||||
ProfileEvents::Counters
|
||||
ProfileEvents::end
|
||||
ProfileEvents::increment
|
||||
ProfileEvents::incrementForLogMessage
|
||||
ProfileEvents::getName
|
||||
ProfileEvents::Timer
|
||||
ProfileEvents::Type
|
||||
ProfileEvents::TypeEnum
|
||||
ProfileEvents::dumpToMapColumn
|
||||
ProfileEvents::getProfileEvents
|
||||
ProfileEvents::ThreadIdToCountersSnapshot
|
||||
ProfileEvents::LOCAL_NAME
|
||||
ProfileEvents::keeper_profile_events
|
||||
ProfileEvents::CountersIncrement
|
||||
|
||||
CurrentMetrics::add
|
||||
CurrentMetrics::sub
|
||||
CurrentMetrics::get
|
||||
CurrentMetrics::set
|
||||
CurrentMetrics::end
|
||||
CurrentMetrics::Increment
|
||||
CurrentMetrics::Metric
|
||||
CurrentMetrics::values
|
||||
CurrentMetrics::Value
|
||||
CurrentMetrics::keeper_metrics
|
||||
|
||||
ErrorCodes::ErrorCode
|
||||
ErrorCodes::getName
|
||||
ErrorCodes::increment
|
||||
ErrorCodes::end
|
||||
ErrorCodes::values
|
||||
ErrorCodes::values[i]
|
||||
ErrorCodes::getErrorCodeByName
|
||||
ErrorCodes::Value
|
||||
)
|
||||
for extern_type in ${!EXTERN_TYPES[@]}; do
|
||||
type_of_extern=${EXTERN_TYPES[$extern_type]}
|
||||
allowed_chars='[_A-Za-z]+'
|
||||
|
||||
# Unused
|
||||
# NOTE: to fix automatically, replace echo with:
|
||||
# sed -i "/extern const $type_of_extern $val/d" $file
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||
# NOTE: the check is pretty dumb and distinguish only by the type_of_extern,
|
||||
# and this matches with zkutil::CreateMode
|
||||
grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp'
|
||||
} | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
|
||||
if ! grep -q "$extern_type::$val" $file; then
|
||||
# Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp
|
||||
if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then
|
||||
echo "$extern_type::$val is defined but not used in file $file"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Undefined
|
||||
# NOTE: to fix automatically, replace echo with:
|
||||
# ( grep -q -F 'namespace $extern_type' $file && \
|
||||
# sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
|
||||
# awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
|
||||
if ! grep -q "extern const $type_of_extern $val" $file; then
|
||||
if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then
|
||||
echo "$extern_type::$val is used in file $file but not defined"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Duplicates
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
|
||||
done
|
||||
done
|
||||
|
||||
# Three or more consecutive empty lines
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
|
||||
|
||||
# Check that every header file has #pragma once in first line
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
|
||||
|
||||
# Too many exclamation marks
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
|
||||
|
||||
# Exclamation mark in a message
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)."
|
||||
|
||||
# Trailing whitespaces
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
|
||||
|
||||
# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
|
||||
|
||||
# Forbid std::cerr/std::cout in src (fine in programs/utils)
|
||||
std_cerr_cout_excludes=(
|
||||
/examples/
|
||||
/tests/
|
||||
_fuzzer
|
||||
# OK
|
||||
src/Common/ProgressIndication.cpp
|
||||
# only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
|
||||
src/Common/HashTable/HashTable.h
|
||||
# SensitiveDataMasker::printStats()
|
||||
src/Common/SensitiveDataMasker.cpp
|
||||
# StreamStatistics::print()
|
||||
src/Compression/LZ4_decompress_faster.cpp
|
||||
# ContextSharedPart with subsequent std::terminate()
|
||||
src/Interpreters/Context.cpp
|
||||
# IProcessor::dump()
|
||||
src/Processors/IProcessor.cpp
|
||||
src/Client/ClientApplicationBase.cpp
|
||||
src/Client/ClientBase.cpp
|
||||
src/Client/LineReader.cpp
|
||||
src/Client/QueryFuzzer.cpp
|
||||
src/Client/Suggest.cpp
|
||||
src/Client/ClientBase.h
|
||||
src/Client/LineReader.h
|
||||
src/Client/ReplxxLineReader.h
|
||||
src/Bridge/IBridge.cpp
|
||||
src/Daemon/BaseDaemon.cpp
|
||||
src/Loggers/Loggers.cpp
|
||||
src/Common/GWPAsan.cpp
|
||||
src/Common/ProgressIndication.h
|
||||
)
|
||||
sources_with_std_cerr_cout=( $(
|
||||
find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
|
||||
grep -vP $EXCLUDE_DIRS | \
|
||||
grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
|
||||
xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
|
||||
) )
|
||||
|
||||
# Exclude comments
|
||||
for src in "${sources_with_std_cerr_cout[@]}"; do
|
||||
# suppress stderr, since it may contain warning for #pargma once in headers
|
||||
if gcc -fpreprocessed -dD -E "$src" 2>/dev/null | grep -F -q -e std::cerr -e std::cout; then
|
||||
echo "$src: uses std::cerr/std::cout"
|
||||
fi
|
||||
done
|
||||
|
||||
expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') )
|
||||
for test_case in "${expect_tests[@]}"; do
|
||||
pattern="^exp_internal -f \$CLICKHOUSE_TMP/\$basename.debuglog 0$"
|
||||
grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
|
||||
|
||||
if grep -q "^spawn.*CLICKHOUSE_CLIENT_BINARY$" "$test_case"; then
|
||||
pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$"
|
||||
grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
|
||||
fi
|
||||
|
||||
# Otherwise expect_after/expect_before will not bail without stdin attached
|
||||
# (and actually this is a hack anyway, correct way is to use $any_spawn_id)
|
||||
pattern="-i \$any_spawn_id timeout"
|
||||
grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
|
||||
pattern="-i \$any_spawn_id eof"
|
||||
grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'"
|
||||
done
|
||||
|
||||
# Forbid non-unique error codes
|
||||
if [[ "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | wc -l)" != "$(grep -Po "M\([0-9]*," $ROOT_PATH/src/Common/ErrorCodes.cpp | sort | uniq | wc -l)" ]]
|
||||
then
|
||||
echo "ErrorCodes.cpp contains non-unique error codes"
|
||||
fi
|
||||
|
||||
# Check that there is no system-wide libraries/headers in use.
|
||||
#
|
||||
# NOTE: it is better to override find_path/find_library in cmake, but right now
|
||||
# it is not possible, see [1] for the reference.
|
||||
#
|
||||
# [1]: git grep --recurse-submodules -e find_library -e find_path contrib
|
||||
if git grep -e find_path -e find_library -- :**CMakeLists.txt; then
|
||||
echo "There is find_path/find_library usage. ClickHouse should use everything bundled. Consider adding one more contrib module."
|
||||
fi
|
||||
|
||||
# Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
|
||||
|
||||
# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
|
||||
|
||||
# Forbid mt19937() and random_device() which are outdated and slow
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
|
||||
|
||||
# Require checking return value of close(),
|
||||
# since it can hide fd misuse and break other places.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
|
||||
|
||||
# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -l -F '#ifdef NDEBUG' | xargs -I@FILE awk '/#ifdef NDEBUG/ { inside = 1; dirty = 1 } /#endif/ { if (inside && dirty) { print "File @FILE has suspicious #ifdef NDEBUG, possibly confused with #ifndef NDEBUG" }; inside = 0 } /#else/ { dirty = 0 }' @FILE
|
||||
|
||||
# If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr."
|
||||
|
||||
# Check for bad punctuation: whitespace before comma.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'"
|
||||
|
||||
# Check usage of std::regex which is too bloated and slow.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number 'std::regex' | grep -P '.' && echo "^ Please use re2 instead of std::regex"
|
||||
|
||||
# Cyrillic characters hiding inside Latin.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | grep -v StorageSystemContributors.generated.cpp | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place."
|
||||
|
||||
# Orphaned header files.
|
||||
join -v1 <(find $ROOT_PATH/{src,programs,utils} -name '*.h' -printf '%f\n' | sort | uniq) <(find $ROOT_PATH/{src,programs,utils} -name '*.cpp' -or -name '*.c' -or -name '*.h' -or -name '*.S' | xargs grep --no-filename -o -P '[\w-]+\.h' | sort | uniq) |
|
||||
grep . && echo '^ Found orphan header files.'
|
||||
|
||||
# Don't allow dynamic compiler check with CMake, because we are using hermetic, reproducible, cross-compiled, static (TLDR, good) builds.
|
||||
ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or -name '*.cmake' | xargs grep --with-filename -i -P 'check_c_compiler_flag|check_cxx_compiler_flag|check_c_source_compiles|check_cxx_source_compiles|check_include_file|check_symbol_exists|cmake_push_check_state|cmake_pop_check_state|find_package|CMAKE_REQUIRED_FLAGS|CheckIncludeFile|CheckCCompilerFlag|CheckCXXCompilerFlag|CheckCSourceCompiles|CheckCXXSourceCompiles|CheckCSymbolExists|CheckCXXSymbolExists' | grep -v Rust && echo "^ It's not allowed to have dynamic compiler checks with CMake."
|
||||
|
||||
# Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong.
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
|
||||
echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
|
||||
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
|
||||
echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."
|
||||
|
||||
# There shouldn't be any code snippets under GPL or LGPL
|
||||
find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
|
||||
|
||||
PATTERN="allow_";
|
||||
DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u));
|
||||
[ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt"
|
37
ci_v2/jobs/scripts/check_style/check_submodules.sh
Executable file
37
ci_v2/jobs/scripts/check_style/check_submodules.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib
|
||||
|
||||
set -e
|
||||
|
||||
GIT_ROOT="."
|
||||
|
||||
cd "$GIT_ROOT"
|
||||
|
||||
# Remove keys for submodule.*.path parameters, the values are separated by \0
|
||||
# and check if the directory exists
|
||||
git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
|
||||
xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d '"'{}'"'; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1
|
||||
|
||||
|
||||
# And check that the submodule is fine
|
||||
git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \
|
||||
xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q '{}' 2>&1
|
||||
|
||||
|
||||
# All submodules should be from https://github.com/
|
||||
git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.url' | \
|
||||
while read -r line; do
|
||||
name=${line#submodule.}; name=${name%.url*}
|
||||
url=${line#* }
|
||||
[[ "$url" != 'https://github.com/'* ]] && echo "All submodules should be from https://github.com/, submodule '$name' has '$url'"
|
||||
done
|
||||
|
||||
# All submodules should be of this form: [submodule "contrib/libxyz"] (for consistency, the submodule name does matter too much)
|
||||
# - restrict the check to top-level .gitmodules file
|
||||
git config --file "$ROOT_PATH/.gitmodules" --get-regexp 'submodule\..+\.path' | \
|
||||
while read -r line; do
|
||||
name=${line#submodule.}; name=${name%.path*}
|
||||
path=${line#* }
|
||||
[ "$name" != "$path" ] && echo "Submodule name '$name' is not equal to it's path '$path'"
|
||||
done
|
15
ci_v2/jobs/scripts/check_style/check_typos.sh
Executable file
15
ci_v2/jobs/scripts/check_style/check_typos.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Check for typos in code.
|
||||
|
||||
ROOT_PATH="."
|
||||
|
||||
#FIXME: check all (or almost all) repo
|
||||
codespell \
|
||||
--skip "*generated*,*gperf*,*.bin,*.mrk*,*.idx,checksums.txt,*.dat,*.pyc,*.kate-swp,*obfuscateQueries.cpp,d3-*.js,*.min.js,*.sum,${ROOT_PATH}/utils/check-style/aspell-ignore" \
|
||||
--ignore-words "${ROOT_PATH}/utils/check-style/codespell-ignore-words.list" \
|
||||
--exclude-file "${ROOT_PATH}/utils/check-style/codespell-ignore-lines.list" \
|
||||
--quiet-level 2 \
|
||||
"$ROOT_PATH"/{src,base,programs,utils} \
|
||||
$@ | grep -P '.' \
|
||||
&& echo -e "\nFound some typos in code.\nSee the files utils/check-style/codespell* if you want to add an exception."
|
98
ci_v2/jobs/scripts/check_style/checks_to_refactor.sh
Executable file
98
ci_v2/jobs/scripts/check_style/checks_to_refactor.sh
Executable file
@ -0,0 +1,98 @@
|
||||
#!/bin/bash
|
||||
|
||||
ROOT_PATH="."
|
||||
|
||||
# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
|
||||
# NOTE: it is not that accurate, but at least something.
|
||||
tests_with_query_log=( $(
|
||||
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
|
||||
xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
|
||||
) )
|
||||
for test_case in "${tests_with_query_log[@]}"; do
|
||||
grep -qE current_database.*currentDatabase "$test_case" || {
|
||||
grep -qE 'current_database.*\$CLICKHOUSE_DATABASE' "$test_case"
|
||||
} || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
|
||||
done
|
||||
|
||||
grep -iE 'SYSTEM STOP MERGES;?$' -R $ROOT_PATH/tests/queries && echo "Merges cannot be disabled globally in fast/stateful/stateless tests, because it will break concurrently running queries"
|
||||
|
||||
|
||||
# Queries to:
|
||||
tables_with_database_column=(
|
||||
system.tables
|
||||
system.parts
|
||||
system.detached_parts
|
||||
system.parts_columns
|
||||
system.columns
|
||||
system.projection_parts
|
||||
system.mutations
|
||||
)
|
||||
# should have database = currentDatabase() condition
|
||||
#
|
||||
# NOTE: it is not that accuate, but at least something.
|
||||
tests_with_database_column=( $(
|
||||
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
|
||||
xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") |
|
||||
grep -v -e ':--' -e ':#' |
|
||||
cut -d: -f1 | sort -u
|
||||
) )
|
||||
for test_case in "${tests_with_database_column[@]}"; do
|
||||
grep -qE database.*currentDatabase "$test_case" || {
|
||||
grep -qE 'database.*\$CLICKHOUSE_DATABASE' "$test_case"
|
||||
} || {
|
||||
# explicit database
|
||||
grep -qE "database[ ]*=[ ]*'" "$test_case"
|
||||
} || {
|
||||
echo "Queries to ${tables_with_database_column[*]} does not have database = currentDatabase()/\$CLICKHOUSE_DATABASE condition in $test_case"
|
||||
}
|
||||
done
|
||||
|
||||
# Queries with ReplicatedMergeTree
|
||||
# NOTE: it is not that accuate, but at least something.
|
||||
tests_with_replicated_merge_tree=( $(
|
||||
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' |
|
||||
xargs grep --with-filename -e "Replicated.*MergeTree[ ]*(.*" | cut -d: -f1 | sort -u
|
||||
) )
|
||||
for test_case in "${tests_with_replicated_merge_tree[@]}"; do
|
||||
case "$test_case" in
|
||||
*.gen.*)
|
||||
;;
|
||||
*.sh)
|
||||
test_case_zk_prefix="\(\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX\|{database}\)"
|
||||
grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
|
||||
;;
|
||||
*.sql|*.sql.j2)
|
||||
test_case_zk_prefix="\({database}\|currentDatabase()\|{uuid}\|{default_path_test}\)"
|
||||
grep -q -e "Replicated.*MergeTree[ ]*(.*$test_case_zk_prefix" "$test_case" || echo "Replicated.*MergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
|
||||
;;
|
||||
*.py)
|
||||
# Right now there is not such tests anyway
|
||||
echo "No ReplicatedMergeTree style check for *.py ($test_case)"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# The stateful directory should only contain the tests that depend on the test dataset (hits or visits).
|
||||
find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."'
|
||||
|
||||
# Check for existence of __init__.py files
|
||||
for i in "${ROOT_PATH}"/tests/integration/test_*; do FILE="${i}/__init__.py"; [ ! -f "${FILE}" ] && echo "${FILE} should exist for every integration test"; done
|
||||
|
||||
# Check for executable bit on non-executable files
|
||||
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
|
||||
|
||||
# Check for BOM
|
||||
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
|
||||
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
|
||||
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
|
||||
|
||||
# Conflict markers
|
||||
find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
|
||||
xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
|
||||
|
||||
# DOS/Windows newlines
|
||||
find $ROOT_PATH/{base,src,programs,utils,docs} -name '*.md' -or -name '*.h' -or -name '*.cpp' -or -name '*.js' -or -name '*.py' -or -name '*.html' | xargs grep -l -P '\r$' && echo "^ Files contain DOS/Windows newlines (\r\n instead of \n)."
|
||||
|
||||
# # workflows check
|
||||
# act --list --directory="$ROOT_PATH" 1>/dev/null 2>&1 || act --list --directory="$ROOT_PATH" 2>&1
|
||||
# actionlint -ignore 'reusable workflow call.+' || :
|
37
ci_v2/jobs/scripts/check_style/double_whitespaces.pl
Executable file
37
ci_v2/jobs/scripts/check_style/double_whitespaces.pl
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
|
||||
# Find double whitespace such as "a, b, c" that looks very ugly and annoying.
|
||||
# But skip double whitespaces if they are used as an alignment - by comparing to surrounding lines.
|
||||
|
||||
my $ret = 0;
|
||||
|
||||
foreach my $file (@ARGV)
|
||||
{
|
||||
my @array;
|
||||
|
||||
open (FH,'<',$file);
|
||||
while (<FH>)
|
||||
{
|
||||
push @array, $_;
|
||||
}
|
||||
|
||||
for (my $i = 1; $i < $#array; ++$i)
|
||||
{
|
||||
if ($array[$i] =~ ',( {2,3})[^ /]')
|
||||
{
|
||||
# https://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl
|
||||
|
||||
if ((substr($array[$i - 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/) # whitespaces are not part of alignment
|
||||
&& (substr($array[$i + 1], $+[1] - 1, 2) !~ /^[ -][^ ]$/)
|
||||
&& $array[$i] !~ /(-?\d+\w*,\s+){3,}/) # this is not a number table like { 10, -1, 2 }
|
||||
{
|
||||
print($file . ":" . ($i + 1) . $array[$i]);
|
||||
$ret = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exit $ret;
|
251
ci_v2/settings/definitions.py
Normal file
251
ci_v2/settings/definitions.py
Normal file
@ -0,0 +1,251 @@
|
||||
from praktika import Docker, Secret
|
||||
|
||||
S3_BUCKET_NAME = "clickhouse-builds"
|
||||
S3_BUCKET_HTTP_ENDPOINT = "clickhouse-builds.s3.amazonaws.com"
|
||||
|
||||
|
||||
class RunnerLabels:
|
||||
CI_SERVICES = "ci_services"
|
||||
CI_SERVICES_EBS = "ci_services_ebs"
|
||||
|
||||
|
||||
BASE_BRANCH = "master"
|
||||
|
||||
SECRETS = [
|
||||
Secret.Config(
|
||||
name="dockerhub_robot_password",
|
||||
type=Secret.Type.AWS_SSM_VAR,
|
||||
),
|
||||
Secret.Config(
|
||||
name="woolenwolf_gh_app.clickhouse-app-id",
|
||||
type=Secret.Type.AWS_SSM_SECRET,
|
||||
),
|
||||
Secret.Config(
|
||||
name="woolenwolf_gh_app.clickhouse-app-key",
|
||||
type=Secret.Type.AWS_SSM_SECRET,
|
||||
),
|
||||
]
|
||||
|
||||
DOCKERS = [
|
||||
# Docker.Config(
|
||||
# name="clickhouse/binary-builder",
|
||||
# path="./docker/packager/binary-builder",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/cctools",
|
||||
# path="./docker/packager/cctools",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-old-centos",
|
||||
# path="./docker/test/compatibility/centos",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-old-ubuntu",
|
||||
# path="./docker/test/compatibility/ubuntu",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-util",
|
||||
# path="./docker/test/util",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/integration-test",
|
||||
# path="./docker/test/integration/base",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/fuzzer",
|
||||
# path="./docker/test/fuzzer",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/performance-comparison",
|
||||
# path="./docker/test/performance-comparison",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/fasttest",
|
||||
# path="./docker/test/fasttest",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-util"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-base",
|
||||
# path="./docker/test/base",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-util"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/clickbench",
|
||||
# path="./docker/test/clickbench",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/keeper-jepsen-test",
|
||||
# path="./docker/test/keeper-jepsen",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/server-jepsen-test",
|
||||
# path="./docker/test/server-jepsen",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/sqllogic-test",
|
||||
# path="./docker/test/sqllogic",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/sqltest",
|
||||
# path="./docker/test/sqltest",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stateless-test",
|
||||
# path="./docker/test/stateless",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stateful-test",
|
||||
# path="./docker/test/stateful",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/stateless-test"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stress-test",
|
||||
# path="./docker/test/stress",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/stateful-test"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/unit-test",
|
||||
# path="./docker/test/unit",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/integration-tests-runner",
|
||||
# path="./docker/test/integration/runner",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
Docker.Config(
|
||||
name="clickhouse/style-test",
|
||||
path="./ci_v2/docker/style-test",
|
||||
platforms=Docker.Platforms.arm_amd,
|
||||
depends_on=[],
|
||||
),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/docs-builder",
|
||||
# path="./docker/docs/builder",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
]
|
||||
|
||||
# TODO:
|
||||
# "docker/test/integration/s3_proxy": {
|
||||
# "name": "clickhouse/s3-proxy",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/resolver": {
|
||||
# "name": "clickhouse/python-bottle",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/helper_container": {
|
||||
# "name": "clickhouse/integration-helper",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/mysql_golang_client": {
|
||||
# "name": "clickhouse/mysql-golang-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/dotnet_client": {
|
||||
# "name": "clickhouse/dotnet-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/mysql_java_client": {
|
||||
# "name": "clickhouse/mysql-java-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/mysql_js_client": {
|
||||
# "name": "clickhouse/mysql-js-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/mysql_php_client": {
|
||||
# "name": "clickhouse/mysql-php-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/postgresql_java_client": {
|
||||
# "name": "clickhouse/postgresql-java-client",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/kerberos_kdc": {
|
||||
# "only_amd64": true,
|
||||
# "name": "clickhouse/kerberos-kdc",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/kerberized_hadoop": {
|
||||
# "only_amd64": true,
|
||||
# "name": "clickhouse/kerberized-hadoop",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/sqlancer": {
|
||||
# "name": "clickhouse/sqlancer-test",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/install/deb": {
|
||||
# "name": "clickhouse/install-deb-test",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/install/rpm": {
|
||||
# "name": "clickhouse/install-rpm-test",
|
||||
# "dependent": []
|
||||
# },
|
||||
# "docker/test/integration/nginx_dav": {
|
||||
# "name": "clickhouse/nginx-dav",
|
||||
# "dependent": []
|
||||
# }
|
||||
|
||||
|
||||
class JobNames:
|
||||
STYLE_CHECK = "Style Check"
|
20
ci_v2/settings/settings.py
Normal file
20
ci_v2/settings/settings.py
Normal file
@ -0,0 +1,20 @@
|
||||
from ci_v2.settings.definitions import (
|
||||
S3_BUCKET_HTTP_ENDPOINT,
|
||||
S3_BUCKET_NAME,
|
||||
RunnerLabels,
|
||||
)
|
||||
|
||||
S3_ARTIFACT_PATH = f"{S3_BUCKET_NAME}/artifacts"
|
||||
CI_CONFIG_RUNS_ON = [RunnerLabels.CI_SERVICES]
|
||||
DOCKER_BUILD_RUNS_ON = [RunnerLabels.CI_SERVICES_EBS]
|
||||
CACHE_S3_PATH = f"{S3_BUCKET_NAME}/ci_ch_cache"
|
||||
HTML_S3_PATH = f"{S3_BUCKET_NAME}/reports"
|
||||
S3_BUCKET_TO_HTTP_ENDPOINT = {S3_BUCKET_NAME: S3_BUCKET_HTTP_ENDPOINT}
|
||||
|
||||
DOCKERHUB_USERNAME = "robotclickhouse"
|
||||
DOCKERHUB_SECRET = "dockerhub_robot_password"
|
||||
|
||||
CI_DB_DB_NAME = "default"
|
||||
CI_DB_TABLE_NAME = "checks"
|
||||
|
||||
INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS = ""
|
44
ci_v2/workflows/pull_request.py
Normal file
44
ci_v2/workflows/pull_request.py
Normal file
@ -0,0 +1,44 @@
|
||||
from typing import List
|
||||
|
||||
from ci_v2.settings.definitions import (
|
||||
BASE_BRANCH,
|
||||
DOCKERS,
|
||||
SECRETS,
|
||||
JobNames,
|
||||
RunnerLabels,
|
||||
)
|
||||
from praktika import Job, Workflow
|
||||
|
||||
style_check_job = Job.Config(
|
||||
name=JobNames.STYLE_CHECK,
|
||||
runs_on=[RunnerLabels.CI_SERVICES],
|
||||
command="python3 ./ci_v2/jobs/check_style.py",
|
||||
run_in_docker="clickhouse/style-test",
|
||||
)
|
||||
|
||||
workflow = Workflow.Config(
|
||||
name="PR",
|
||||
event=Workflow.Event.PULL_REQUEST,
|
||||
base_branches=[BASE_BRANCH],
|
||||
jobs=[
|
||||
style_check_job,
|
||||
],
|
||||
dockers=DOCKERS,
|
||||
secrets=SECRETS,
|
||||
enable_cache=True,
|
||||
enable_report=True,
|
||||
enable_merge_ready_status=True,
|
||||
)
|
||||
|
||||
WORKFLOWS = [
|
||||
workflow,
|
||||
] # type: List[Workflow.Config]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# example: local job test inside praktika environment
|
||||
from praktika.runner import Runner
|
||||
|
||||
Runner.generate_dummy_environment(workflow, style_check_job)
|
||||
|
||||
Runner().run(workflow, style_check_job)
|
@ -2,11 +2,11 @@
|
||||
|
||||
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
|
||||
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
|
||||
SET(VERSION_REVISION 54490)
|
||||
SET(VERSION_REVISION 54491)
|
||||
SET(VERSION_MAJOR 24)
|
||||
SET(VERSION_MINOR 9)
|
||||
SET(VERSION_MINOR 10)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
|
||||
SET(VERSION_DESCRIBE v24.9.1.1-testing)
|
||||
SET(VERSION_STRING 24.9.1.1)
|
||||
SET(VERSION_GITHASH b12a367741812f9e5fe754d19ebae600e2a2614c)
|
||||
SET(VERSION_DESCRIBE v24.10.1.1-testing)
|
||||
SET(VERSION_STRING 24.10.1.1)
|
||||
# end of autochange
|
||||
|
@ -12,11 +12,13 @@ macro(add_headers_only prefix common_path)
|
||||
add_glob(${prefix}_headers ${common_path}/*.h)
|
||||
endmacro()
|
||||
|
||||
# Assumes the path is under src and that src/ needs to be removed (valid for any subdirectory under src/)
|
||||
macro(extract_into_parent_list src_list dest_list)
|
||||
list(REMOVE_ITEM ${src_list} ${ARGN})
|
||||
get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME)
|
||||
file(RELATIVE_PATH relative ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string(REPLACE "src/" "" relative ${relative})
|
||||
foreach(file IN ITEMS ${ARGN})
|
||||
list(APPEND ${dest_list} ${__dir_name}/${file})
|
||||
list(APPEND ${dest_list} ${relative}/${file})
|
||||
endforeach()
|
||||
set(${dest_list} "${${dest_list}}" PARENT_SCOPE)
|
||||
endmacro()
|
||||
|
@ -18,4 +18,4 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
|
||||
|
||||
set (USE_MUSL 1)
|
||||
add_definitions(-DUSE_MUSL=1)
|
||||
add_definitions(-DUSE_MUSL=1 -D__MUSL__=1)
|
||||
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -160,6 +160,12 @@ add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (incbin-cmake incbin)
|
||||
add_contrib (sqids-cpp-cmake sqids-cpp)
|
||||
|
||||
option(USE_MONGODB "Enable MongoDB support" ${ENABLE_LIBRARIES})
|
||||
if (USE_MONGODB)
|
||||
add_contrib (mongo-c-driver-cmake mongo-c-driver) # requires: zlib
|
||||
add_contrib (mongo-cxx-driver-cmake mongo-cxx-driver) # requires: libmongoc, libbson
|
||||
endif()
|
||||
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_NLP)
|
||||
add_contrib (libstemmer-c-cmake libstemmer_c)
|
||||
|
@ -69,6 +69,11 @@ set (SRCS_PROGRAM_OPTIONS
|
||||
"${LIBRARY_DIR}/libs/program_options/src/winmain.cpp"
|
||||
)
|
||||
|
||||
# Always compile this file with the highest possible level of optimizations, even in Debug builds.
|
||||
# Otherwise the client takes too long to start and SQL stateless tests (many options to parse)
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/65745
|
||||
set_source_files_properties(${SRCS_PROGRAM_OPTIONS} PROPERTIES COMPILE_FLAGS "-O3")
|
||||
|
||||
add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS})
|
||||
add_library (boost::program_options ALIAS _boost_program_options)
|
||||
target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
|
||||
|
@ -164,15 +164,6 @@ target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
|
||||
# Because our coverage callbacks call malloc, and recursive call of malloc could not work.
|
||||
target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST})
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
target_compile_definitions(_jemalloc PRIVATE
|
||||
-DJEMALLOC_DEBUG=1
|
||||
# Usage examples:
|
||||
# - MALLOC_CONF=log:.
|
||||
# - MALLOC_CONF='log:core.malloc.exit|core.sallocx.entry|core.sdallocx.entry'
|
||||
-DJEMALLOC_LOG=1)
|
||||
endif ()
|
||||
|
||||
target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)
|
||||
|
||||
# jemalloc provides support two unwind flavors:
|
||||
|
@ -50,7 +50,11 @@ if(NOT MARIADB_UNIX_ADDR)
|
||||
set(MARIADB_UNIX_ADDR "/tmp/mysql.sock")
|
||||
endif()
|
||||
|
||||
set(HAVE_ALLOCA_H 1)
|
||||
if (OS_FREEBSD)
|
||||
set(HAVE_ALLOCA_H 0)
|
||||
else()
|
||||
set(HAVE_ALLOCA_H 1)
|
||||
endif()
|
||||
set(HAVE_ARPA_INET_H 1)
|
||||
set(HAVE_DLFCN_H 1)
|
||||
set(HAVE_FCNTL_H 1)
|
||||
|
1
contrib/mongo-c-driver
vendored
Submodule
1
contrib/mongo-c-driver
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit d55410c69183c90d18fd3b3f1d9db3d224fc8d52
|
150
contrib/mongo-c-driver-cmake/CMakeLists.txt
Normal file
150
contrib/mongo-c-driver-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,150 @@
|
||||
option(USE_MONGODB "Enable MongoDB support" ${ENABLE_LIBRARIES})
|
||||
if(NOT USE_MONGODB)
|
||||
message(STATUS "Not using libmongoc and libbson")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(libbson_VERSION_MAJOR 1)
|
||||
set(libbson_VERSION_MINOR 27)
|
||||
set(libbson_VERSION_PATCH 0)
|
||||
set(libbson_VERSION 1.27.0)
|
||||
set(libmongoc_VERSION_MAJOR 1)
|
||||
set(libmongoc_VERSION_MINOR 27)
|
||||
set(libmongoc_VERSION_PATCH 0)
|
||||
set(libmongoc_VERSION 1.27.0)
|
||||
|
||||
set(LIBBSON_SOURCES_ROOT "${ClickHouse_SOURCE_DIR}/contrib/mongo-c-driver/src")
|
||||
set(LIBBSON_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/libbson/src")
|
||||
file(GLOB_RECURSE LIBBSON_SOURCES "${LIBBSON_SOURCE_DIR}/*.c")
|
||||
|
||||
set(LIBBSON_BINARY_ROOT "${ClickHouse_BINARY_DIR}/contrib/mongo-c-driver/src")
|
||||
set(LIBBSON_BINARY_DIR "${LIBBSON_BINARY_ROOT}/libbson/src")
|
||||
|
||||
include(TestBigEndian)
|
||||
test_big_endian(BSON_BIG_ENDIAN)
|
||||
if(BSON_BIG_ENDIAN)
|
||||
set(BSON_BYTE_ORDER 4321)
|
||||
else()
|
||||
set(BSON_BYTE_ORDER 1234)
|
||||
endif()
|
||||
|
||||
set(BSON_OS 1)
|
||||
set(BSON_EXTRA_ALIGN 1)
|
||||
set(BSON_HAVE_SNPRINTF 1)
|
||||
set(BSON_HAVE_TIMESPEC 1)
|
||||
set(BSON_HAVE_GMTIME_R 1)
|
||||
set(BSON_HAVE_RAND_R 1)
|
||||
set(BSON_HAVE_STRINGS_H 1)
|
||||
set(BSON_HAVE_STRLCPY 0)
|
||||
set(BSON_HAVE_STRNLEN 1)
|
||||
set(BSON_HAVE_STDBOOL_H 1)
|
||||
set(BSON_HAVE_CLOCK_GETTIME 1)
|
||||
|
||||
|
||||
# common settings
|
||||
set(MONGOC_TRACE 0)
|
||||
set(MONGOC_ENABLE_STATIC_BUILD 1)
|
||||
set(MONGOC_ENABLE_DEBUG_ASSERTIONS 0)
|
||||
set(MONGOC_ENABLE_MONGODB_AWS_AUTH 0)
|
||||
set(MONGOC_ENABLE_SASL_CYRUS 0)
|
||||
set(MONGOC_ENABLE_SASL 0)
|
||||
set(MONGOC_ENABLE_SASL_SSPI 0)
|
||||
set(MONGOC_HAVE_SASL_CLIENT_DONE 0)
|
||||
set(MONGOC_ENABLE_SRV 0)
|
||||
|
||||
# DNS
|
||||
set(MONGOC_HAVE_DNSAPI 0)
|
||||
set(MONGOC_HAVE_RES_SEARCH 0)
|
||||
set(MONGOC_HAVE_RES_NSEARCH 0)
|
||||
set(MONGOC_HAVE_RES_NCLOSE 0)
|
||||
set(MONGOC_HAVE_RES_NDESTROY 0)
|
||||
|
||||
set(MONGOC_ENABLE_COMPRESSION 1)
|
||||
set(MONGOC_ENABLE_COMPRESSION_ZLIB 0)
|
||||
set(MONGOC_ENABLE_COMPRESSION_SNAPPY 0)
|
||||
set(MONGOC_ENABLE_COMPRESSION_ZSTD 1)
|
||||
|
||||
# SSL
|
||||
set(MONGOC_ENABLE_CRYPTO 0)
|
||||
set(MONGOC_ENABLE_CRYPTO_CNG 0)
|
||||
set(MONGOC_ENABLE_CRYPTO_COMMON_CRYPTO 0)
|
||||
set(MONGOC_ENABLE_CRYPTO_SYSTEM_PROFILE 0)
|
||||
set(MONGOC_ENABLE_SSL 0)
|
||||
set(MONGOC_ENABLE_SSL_OPENSSL 0)
|
||||
set(MONGOC_ENABLE_SSL_SECURE_CHANNEL 0)
|
||||
set(MONGOC_ENABLE_SSL_SECURE_TRANSPORT 0)
|
||||
set(MONGOC_ENABLE_SSL_LIBRESSL 0)
|
||||
set(MONGOC_ENABLE_CRYPTO_LIBCRYPTO 0)
|
||||
set(MONGOC_ENABLE_CLIENT_SIDE_ENCRYPTION 0)
|
||||
set(MONGOC_HAVE_ASN1_STRING_GET0_DATA 0)
|
||||
if(ENABLE_SSL)
|
||||
set(MONGOC_ENABLE_SSL 1)
|
||||
set(MONGOC_ENABLE_CRYPTO 1)
|
||||
set(MONGOC_ENABLE_SSL_OPENSSL 1)
|
||||
set(MONGOC_ENABLE_CRYPTO_LIBCRYPTO 1)
|
||||
set(MONGOC_HAVE_ASN1_STRING_GET0_DATA 1)
|
||||
else()
|
||||
message(WARNING "Building mongoc without SSL")
|
||||
endif()
|
||||
|
||||
set(CMAKE_EXTRA_INCLUDE_FILES "sys/socket.h")
|
||||
set(MONGOC_SOCKET_ARG2 "struct sockaddr")
|
||||
set(MONGOC_HAVE_SOCKLEN 1)
|
||||
set(MONGOC_SOCKET_ARG3 "socklen_t")
|
||||
|
||||
set(MONGOC_ENABLE_RDTSCP 0)
|
||||
set(MONGOC_NO_AUTOMATIC_GLOBALS 1)
|
||||
set(MONGOC_ENABLE_STATIC_INSTALL 0)
|
||||
set(MONGOC_ENABLE_SHM_COUNTERS 0)
|
||||
set(MONGOC_HAVE_SCHED_GETCPU 0)
|
||||
set(MONGOC_HAVE_SS_FAMILY 0)
|
||||
|
||||
configure_file(
|
||||
${LIBBSON_SOURCE_DIR}/bson/bson-config.h.in
|
||||
${LIBBSON_BINARY_DIR}/bson/bson-config.h
|
||||
)
|
||||
configure_file(
|
||||
${LIBBSON_SOURCE_DIR}/bson/bson-version.h.in
|
||||
${LIBBSON_BINARY_DIR}/bson/bson-version.h
|
||||
)
|
||||
|
||||
set(COMMON_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/common")
|
||||
file(GLOB_RECURSE COMMON_SOURCES "${COMMON_SOURCE_DIR}/*.c")
|
||||
configure_file(
|
||||
${COMMON_SOURCE_DIR}/common-config.h.in
|
||||
${COMMON_SOURCE_DIR}/common-config.h
|
||||
)
|
||||
add_library(_libbson ${LIBBSON_SOURCES} ${COMMON_SOURCES})
|
||||
add_library(ch_contrib::libbson ALIAS _libbson)
|
||||
target_include_directories(_libbson SYSTEM PUBLIC ${LIBBSON_SOURCE_DIR} ${LIBBSON_BINARY_DIR} ${COMMON_SOURCE_DIR})
|
||||
target_compile_definitions(_libbson PRIVATE BSON_COMPILATION)
|
||||
if(OS_LINUX)
|
||||
target_compile_definitions(_libbson PRIVATE -D_GNU_SOURCE -D_POSIX_C_SOURCE=199309L -D_XOPEN_SOURCE=600)
|
||||
elseif(OS_DARWIN)
|
||||
target_compile_definitions(_libbson PRIVATE -D_DARWIN_C_SOURCE)
|
||||
endif()
|
||||
|
||||
|
||||
set(LIBMONGOC_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/libmongoc/src")
|
||||
file(GLOB_RECURSE LIBMONGOC_SOURCES "${LIBMONGOC_SOURCE_DIR}/*.c")
|
||||
set(UTF8PROC_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/utf8proc-2.8.0")
|
||||
set(UTF8PROC_SOURCES "${UTF8PROC_SOURCE_DIR}/utf8proc.c")
|
||||
set(UTHASH_SOURCE_DIR "${LIBBSON_SOURCES_ROOT}/uthash")
|
||||
|
||||
configure_file(
|
||||
${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-config.h.in
|
||||
${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-config.h
|
||||
)
|
||||
configure_file(
|
||||
${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-version.h.in
|
||||
${LIBMONGOC_SOURCE_DIR}/mongoc/mongoc-version.h
|
||||
)
|
||||
add_library(_libmongoc ${LIBMONGOC_SOURCES} ${COMMON_SOURCES} ${UTF8PROC_SOURCES})
|
||||
add_library(ch_contrib::libmongoc ALIAS _libmongoc)
|
||||
target_include_directories(_libmongoc SYSTEM PUBLIC ${LIBMONGOC_SOURCE_DIR} ${COMMON_SOURCE_DIR} ${UTF8PROC_SOURCE_DIR} ${UTHASH_SOURCE_DIR})
|
||||
target_include_directories(_libmongoc SYSTEM PRIVATE ${LIBMONGOC_SOURCE_DIR}/mongoc ${UTHASH_SOURCE_DIR})
|
||||
target_compile_definitions(_libmongoc PRIVATE MONGOC_COMPILATION)
|
||||
target_link_libraries(_libmongoc ch_contrib::libbson ch_contrib::c-ares ch_contrib::zstd)
|
||||
if(ENABLE_SSL)
|
||||
target_link_libraries(_libmongoc OpenSSL::SSL)
|
||||
endif()
|
1
contrib/mongo-cxx-driver
vendored
Submodule
1
contrib/mongo-cxx-driver
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 3166bdb49b717ce1bc30f46cc2b274ab1de7005b
|
192
contrib/mongo-cxx-driver-cmake/CMakeLists.txt
Normal file
192
contrib/mongo-cxx-driver-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,192 @@
|
||||
option(USE_MONGODB "Enable MongoDB support" ${ENABLE_LIBRARIES})
|
||||
|
||||
if(NOT USE_MONGODB)
|
||||
message(STATUS "Not using mongocxx and bsoncxx")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(BSONCXX_SOURCES_DIR "${ClickHouse_SOURCE_DIR}/contrib/mongo-cxx-driver/src/bsoncxx")
|
||||
set(BSONCXX_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/mongo-cxx-driver/src/bsoncxx")
|
||||
|
||||
set(BSONCXX_SOURCES
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/element.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/value.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/array/view.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/builder/core.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/decimal128.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/element.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/value.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/document/view.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/exception/error_code.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/json.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/oid.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/private/itoa.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/string/view_or_value.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types/bson_value/value.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/types/bson_value/view.cpp
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/validate.cpp
|
||||
)
|
||||
set(BSONCXX_POLY_USE_IMPLS ON)
|
||||
|
||||
configure_file(
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/config.hpp.in
|
||||
${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/config.hpp
|
||||
)
|
||||
configure_file(
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/version.hpp.in
|
||||
${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/version.hpp
|
||||
)
|
||||
configure_file(
|
||||
${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/private/config.hh.in
|
||||
${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/private/config.hh
|
||||
)
|
||||
|
||||
add_library(_bsoncxx ${BSONCXX_SOURCES})
|
||||
add_library(ch_contrib::bsoncxx ALIAS _bsoncxx)
|
||||
target_include_directories(_bsoncxx SYSTEM PUBLIC "${BSONCXX_SOURCES_DIR}/include/bsoncxx/v_noabi" "${BSONCXX_SOURCES_DIR}/lib/bsoncxx/v_noabi" "${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi")
|
||||
target_compile_definitions(_bsoncxx PUBLIC BSONCXX_STATIC)
|
||||
target_link_libraries(_bsoncxx ch_contrib::libbson)
|
||||
|
||||
include(GenerateExportHeader)
|
||||
generate_export_header(_bsoncxx
|
||||
BASE_NAME BSONCXX
|
||||
EXPORT_MACRO_NAME BSONCXX_API
|
||||
NO_EXPORT_MACRO_NAME BSONCXX_PRIVATE
|
||||
EXPORT_FILE_NAME ${BSONCXX_BINARY_DIR}/lib/bsoncxx/v_noabi/bsoncxx/config/export.hpp
|
||||
STATIC_DEFINE BSONCXX_STATIC
|
||||
)
|
||||
|
||||
|
||||
|
||||
set(MONGOCXX_SOURCES_DIR "${ClickHouse_SOURCE_DIR}/contrib/mongo-cxx-driver/src/mongocxx")
|
||||
set(MONGOCXX_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/mongo-cxx-driver/src/mongocxx")
|
||||
set(MONGOCXX_SOURCES
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/bulk_write.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/change_stream.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client_encryption.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/client_session.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/collection.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/cursor.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/database.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/command_failed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/command_started_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/command_succeeded_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/heartbeat_failed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/heartbeat_started_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/heartbeat_succeeded_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/server_changed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/server_closed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/server_description.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/server_opening_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_changed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_closed_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_description.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/events/topology_opening_event.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/error_code.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/operation_exception.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/exception/server_error_code.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/bucket.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/downloader.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/gridfs/uploader.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/hint.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/index_model.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/index_view.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/instance.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/logger.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/delete_many.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/delete_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/insert_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/replace_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/update_many.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/update_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/model/write.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/aggregate.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/apm.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/auto_encryption.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/bulk_write.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/change_stream.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/client.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/client_encryption.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/client_session.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/count.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/create_collection.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/data_key.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/delete.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/distinct.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/encrypt.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/estimated_document_count.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/find.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/find_one_and_delete.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/find_one_and_replace.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/find_one_and_update.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/gridfs/bucket.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/gridfs/upload.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/index.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/index_view.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/insert.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/pool.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/range.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/replace.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/rewrap_many_datakey.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/server_api.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/tls.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/transaction.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/options/update.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/pipeline.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/pool.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/conversions.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/libbson.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/libmongoc.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/private/numeric_casting.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/read_concern.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/read_preference.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/bulk_write.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/delete.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/gridfs/upload.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/insert_many.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/insert_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/replace_one.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/rewrap_many_datakey.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/result/update.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/search_index_model.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/search_index_view.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/uri.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/validation_criteria.cpp
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/write_concern.cpp
|
||||
)
|
||||
set(MONGOCXX_COMPILER_VERSION "${CMAKE_CXX_COMPILER_VERSION}")
|
||||
set(MONGOCXX_COMPILER_ID "${CMAKE_CXX_COMPILER_ID}")
|
||||
set(MONGOCXX_LINK_WITH_STATIC_MONGOC 1)
|
||||
set(MONGOCXX_BUILD_STATIC 1)
|
||||
if(ENABLE_SSL)
|
||||
set(MONGOCXX_ENABLE_SSL 1)
|
||||
endif()
|
||||
|
||||
configure_file(
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/config.hpp.in
|
||||
${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/config.hpp
|
||||
)
|
||||
configure_file(
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/version.hpp.in
|
||||
${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/version.hpp
|
||||
)
|
||||
configure_file(
|
||||
${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi/mongocxx/config/private/config.hh.in
|
||||
${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/private/config.hh
|
||||
)
|
||||
|
||||
add_library(_mongocxx ${MONGOCXX_SOURCES})
|
||||
add_library(ch_contrib::mongocxx ALIAS _mongocxx)
|
||||
target_include_directories(_mongocxx SYSTEM PUBLIC "${MONGOCXX_SOURCES_DIR}/include/mongocxx/v_noabi" "${MONGOCXX_SOURCES_DIR}/lib/mongocxx/v_noabi" "${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi")
|
||||
target_compile_definitions(_mongocxx PUBLIC MONGOCXX_STATIC)
|
||||
target_link_libraries(_mongocxx ch_contrib::bsoncxx ch_contrib::libmongoc)
|
||||
|
||||
generate_export_header(_mongocxx
|
||||
BASE_NAME MONGOCXX
|
||||
EXPORT_MACRO_NAME MONGOCXX_API
|
||||
NO_EXPORT_MACRO_NAME MONGOCXX_PRIVATE
|
||||
EXPORT_FILE_NAME ${MONGOCXX_BINARY_DIR}/lib/mongocxx/v_noabi/mongocxx/config/export.hpp
|
||||
STATIC_DEFINE MONGOCXX_STATIC
|
||||
)
|
2
contrib/sysroot
vendored
2
contrib/sysroot
vendored
@ -1 +1 @@
|
||||
Subproject commit 5be834147d5b5dd77ca2b821f356982029320513
|
||||
Subproject commit 738138e665809a5b28c453983c5f48f23a340ed6
|
@ -1,4 +1,11 @@
|
||||
# docker build -t clickhouse/docs-builder .
|
||||
FROM golang:alpine AS htmltest-builder
|
||||
|
||||
ARG HTMLTEST_VERSION=0.17.0
|
||||
|
||||
RUN CGO_ENABLED=0 go install github.com/wjdp/htmltest@v${HTMLTEST_VERSION} \
|
||||
&& mv "${GOPATH}/bin/htmltest" /usr/bin/htmltest
|
||||
|
||||
# nodejs 17 prefers ipv6 and is broken in our environment
|
||||
FROM node:16-alpine
|
||||
|
||||
@ -17,6 +24,13 @@ RUN yarn config set registry https://registry.npmjs.org \
|
||||
&& yarn install \
|
||||
&& yarn cache clean
|
||||
|
||||
ENV HOME /opt/clickhouse-docs
|
||||
|
||||
RUN mkdir /output_path \
|
||||
&& chmod -R a+w . /output_path \
|
||||
&& git config --global --add safe.directory /opt/clickhouse-docs
|
||||
|
||||
COPY run.sh /run.sh
|
||||
COPY --from=htmltest-builder /usr/bin/htmltest /usr/bin/htmltest
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.8.4.13"
|
||||
ARG VERSION="24.9.1.3278"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -97,6 +97,10 @@ cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUI
|
||||
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
|
||||
ninja $NINJA_FLAGS $BUILD_TARGET
|
||||
|
||||
# We don't allow dirty files in the source directory after build
|
||||
git ls-files --others --exclude-standard | grep . && echo "^ Dirty files in the working copy after build" && exit 1
|
||||
git submodule foreach --quiet git ls-files --others --exclude-standard | grep . && echo "^ Dirty files in submodules after build" && exit 1
|
||||
|
||||
ls -la ./programs
|
||||
|
||||
ccache_status
|
||||
|
@ -2,8 +2,8 @@
|
||||
# To run this script you must install docker and piddeptree python package
|
||||
#
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM ubuntu:20.04 AS glibc-donor
|
||||
FROM ubuntu:22.04 AS glibc-donor
|
||||
ARG TARGETARCH
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
@ -6,8 +6,11 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
arm64) rarch=aarch64 ;; \
|
||||
esac \
|
||||
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
|
||||
|
||||
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu \
|
||||
&& case $arch in \
|
||||
amd64) ln /lib/linux-gnu/ld-linux-x86-64.so.2 /lib/linux-gnu/ld-2.35.so ;; \
|
||||
arm64) ln /lib/linux-gnu/ld-linux-aarch64.so.1 /lib/linux-gnu/ld-2.35.so ;; \
|
||||
esac
|
||||
|
||||
FROM alpine
|
||||
|
||||
@ -17,7 +20,7 @@ ENV LANG=en_US.UTF-8 \
|
||||
TZ=UTC \
|
||||
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
|
||||
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.35.so /lib/
|
||||
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
@ -25,14 +28,14 @@ COPY entrypoint.sh /entrypoint.sh
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.35.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
arm64) ln -sf /lib/ld-2.35.so /lib/ld-linux-aarch64.so.1 ;; \
|
||||
esac
|
||||
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.8.4.13"
|
||||
ARG VERSION="24.9.1.3278"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.8.4.13"
|
||||
ARG VERSION="24.9.1.3278"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import jinja2
|
||||
import itertools
|
||||
import os
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import jinja2
|
||||
|
||||
|
||||
def removesuffix(text, suffix):
|
||||
|
@ -1,7 +1,8 @@
|
||||
import datetime
|
||||
import os
|
||||
import subprocess
|
||||
import datetime
|
||||
from flask import Flask, flash, request, redirect, url_for
|
||||
|
||||
from flask import Flask, flash, redirect, request, url_for
|
||||
|
||||
|
||||
def run_command(command, wait=False):
|
||||
|
@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import logging
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
def process_result(result_folder):
|
||||
|
@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import html
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
from clickhouse_driver import Client
|
||||
|
||||
import yaml
|
||||
from clickhouse_driver import Client
|
||||
|
||||
client = Client(host="localhost", port=9000)
|
||||
settings = {
|
||||
|
9
docs/.htmltest.yml
Normal file
9
docs/.htmltest.yml
Normal file
@ -0,0 +1,9 @@
|
||||
DirectoryPath: /test
|
||||
IgnoreDirectoryMissingTrailingSlash: true
|
||||
CheckExternal: false
|
||||
CheckInternal: false
|
||||
CheckInternalHash: true
|
||||
CheckMailto: false
|
||||
IgnoreAltMissing: true
|
||||
IgnoreEmptyHref: true
|
||||
IgnoreInternalEmptyHash: true
|
32
docs/changelogs/v24.7.5.37-stable.md
Normal file
32
docs/changelogs/v24.7.5.37-stable.md
Normal file
@ -0,0 +1,32 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.7.5.37-stable (f2533ca97be) FIXME as compared to v24.7.4.51-stable (70fe2f6fa52)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#68971](https://github.com/ClickHouse/ClickHouse/issues/68971): Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Backported in [#68816](https://github.com/ClickHouse/ClickHouse/issues/68816): Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Backported in [#69006](https://github.com/ClickHouse/ClickHouse/issues/69006): After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68884](https://github.com/ClickHouse/ClickHouse/issues/68884): Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Backported in [#69027](https://github.com/ClickHouse/ClickHouse/issues/69027): Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#68862](https://github.com/ClickHouse/ClickHouse/issues/68862): Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#68788](https://github.com/ClickHouse/ClickHouse/issues/68788): Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Backported in [#68909](https://github.com/ClickHouse/ClickHouse/issues/68909): Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#69158](https://github.com/ClickHouse/ClickHouse/issues/69158): Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69118](https://github.com/ClickHouse/ClickHouse/issues/69118): Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#68944](https://github.com/ClickHouse/ClickHouse/issues/68944):. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#68832](https://github.com/ClickHouse/ClickHouse/issues/68832): Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#68779](https://github.com/ClickHouse/ClickHouse/issues/68779): Fix flaky test 00989_parallel_parts_loading. [#68737](https://github.com/ClickHouse/ClickHouse/pull/68737) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#68960](https://github.com/ClickHouse/ClickHouse/issues/68960): Fix 2477 timeout. [#68752](https://github.com/ClickHouse/ClickHouse/pull/68752) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Backported in [#69050](https://github.com/ClickHouse/ClickHouse/issues/69050): Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
500
docs/changelogs/v24.9.1.3278-stable.md
Normal file
500
docs/changelogs/v24.9.1.3278-stable.md
Normal file
@ -0,0 +1,500 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.9.1.3278-stable (6d058d82a8e) FIXME as compared to v24.9.1.1-new (e02b434d2fc)
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Allow to write `SETTINGS` before `FORMAT` in a chain of queries with `UNION` when subqueries are inside parentheses. This closes [#39712](https://github.com/ClickHouse/ClickHouse/issues/39712). Change the behavior when a query has the SETTINGS clause specified twice in a sequence. The closest SETTINGS clause will have a preference for the corresponding subquery. In the previous versions, the outermost SETTINGS clause could take a preference over the inner one. [#60197](https://github.com/ClickHouse/ClickHouse/pull/60197) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Do not allow explicitly specifying UUID when creating a table in Replicated database. Also, do not allow explicitly specifying ZooKeeper path and replica name for *MergeTree tables in Replicated databases. [#66104](https://github.com/ClickHouse/ClickHouse/pull/66104) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Reimplement Dynamic type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into Dynamic column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Expressions like `a[b].c` are supported for named tuples, as well as named subscripts from arbitrary expressions, e.g., `expr().name`. This is useful for processing JSON. This closes [#54965](https://github.com/ClickHouse/ClickHouse/issues/54965). In previous versions, an expression of form `expr().name` was parsed as `tupleElement(expr(), name)`, and the query analyzer was searching for a column `name` rather than for the corresponding tuple element; while in the new version, it is changed to `tupleElement(expr(), 'name')`. In most cases, the previous version was not working, but it is possible to imagine a very unusual scenario when this change could lead to incompatibility: if you stored names of tuple elements in a column or an alias, that was named differently than the tuple element's name: `SELECT 'b' AS a, CAST([tuple(123)] AS 'Array(Tuple(b UInt8))') AS t, t[1].a`. It is very unlikely that you used such queries, but we still have to mark this change as potentially backward incompatible. [#68435](https://github.com/ClickHouse/ClickHouse/pull/68435) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* When the setting `print_pretty_type_names` is enabled, it will print `Tuple` data type in a pretty form in `SHOW CREATE TABLE` statements, `formatQuery` function, and in the interactive mode in `clickhouse-client` and `clickhouse-local`. In previous versions, this setting was only applied to `DESCRIBE` queries and `toTypeName`. This closes [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). [#68492](https://github.com/ClickHouse/ClickHouse/pull/68492) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### New Feature
|
||||
* Function `toStartOfInterval()` now has a new overload which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function. ([#55619](https://github.com/ClickHouse/ClickHouse/issues/55619)). It allows to align date or timestamp values to multiples of a given interval from an *arbitrary* origin (instead of 0000-01-01 00:00:00.000 as *fixed* origin). For example, `SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'));` returns `2023-01-01 14:44:30` which is a multiple of 1 minute intervals, starting from origin `2023-01-01 14:35:30`. [#56738](https://github.com/ClickHouse/ClickHouse/pull/56738) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Add support for `ATTACH PARTITION ALL FROM`. [#61987](https://github.com/ClickHouse/ClickHouse/pull/61987) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* Adds a setting `input_format_try_infer_variants` which allows Variant type to be inferred during schema inference for text formats when there is more than one possible type for column/array elements. [#63798](https://github.com/ClickHouse/ClickHouse/pull/63798) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Introduced JSONCompactWithProgress format where ClickHouse outputs each row as a newline-delimited JSON object, including metadata, data, progress, totals, and statistics. [#66205](https://github.com/ClickHouse/ClickHouse/pull/66205) ([Alexey Korepanov](https://github.com/alexkorep)).
|
||||
* Implement new JSON data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Add the `input_format_json_empty_as_default` setting which, when enabled, treats empty fields in JSON inputs as default values. Closes [#59339](https://github.com/ClickHouse/ClickHouse/issues/59339). [#66782](https://github.com/ClickHouse/ClickHouse/pull/66782) ([Alexis Arnaud](https://github.com/a-a-f)).
|
||||
* Added functions `overlay` and `overlayUTF8` which replace parts of a string by another string. Example: `SELECT overlay('Hello New York', 'Jersey', 11)` returns `Hello New Jersey`. [#66933](https://github.com/ClickHouse/ClickHouse/pull/66933) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add new Command, Lightweight Delete In Partition ``` DELETE FROM [db.]table [ON CLUSTER cluster] [IN PARTITION partition_expr] WHERE expr; ``` ``` VM-114-29-tos :) select * from ads_app_poster_ip_source_channel_di_replicated_local;. [#67805](https://github.com/ClickHouse/ClickHouse/pull/67805) ([sunny](https://github.com/sunny19930321)).
|
||||
* Implemented comparison for `Interval` data type values so they are converting now to the least supertype. [#68057](https://github.com/ClickHouse/ClickHouse/pull/68057) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Add create_if_not_exists setting to default to IF NOT EXISTS behavior during CREATE statements. [#68164](https://github.com/ClickHouse/ClickHouse/pull/68164) ([Peter Nguyen](https://github.com/petern48)).
|
||||
* Makes possible to read Iceberg tables in Azure and locally. [#68210](https://github.com/ClickHouse/ClickHouse/pull/68210) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
* Add aggregate functions distinctDynamicTypes/distinctJSONPaths/distinctJSONPathsAndTypes for better introspection of JSON column type content. [#68463](https://github.com/ClickHouse/ClickHouse/pull/68463) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Query cache entries can now be dropped by tag. For example, the query cache entry created by `SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'` can now be dropped by `SYSTEM DROP QUERY CACHE TAG 'abc'` (or of course just: `SYSTEM DROP QUERY CACHE` which will clear the entire query cache). [#68477](https://github.com/ClickHouse/ClickHouse/pull/68477) ([Michał Tabaszewski](https://github.com/pinsvin00)).
|
||||
* Add storage encryption for named collections. [#68615](https://github.com/ClickHouse/ClickHouse/pull/68615) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Added `ripeMD160` function, which computes the RIPEMD-160 cryptographic hash of a string. Example: `SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'))` returns `37F332F68DB77BD9D7EDD4969571AD671CF9DD3B`. [#68639](https://github.com/ClickHouse/ClickHouse/pull/68639) ([Dergousov Maxim](https://github.com/m7kss1)).
|
||||
* Add virtual column _headers for url table engine. Closes [#65026](https://github.com/ClickHouse/ClickHouse/issues/65026). [#68867](https://github.com/ClickHouse/ClickHouse/pull/68867) ([flynn](https://github.com/ucasfl)).
|
||||
* Adding `system.projections` table to track available projections. [#68901](https://github.com/ClickHouse/ClickHouse/pull/68901) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Add new function `arrayZipUnaligned` for spark compatiablity(arrays_zip), which allowed unaligned arrays based on original `arrayZip`. ``` sql SELECT arrayZipUnaligned([1], [1, 2, 3]). [#69030](https://github.com/ClickHouse/ClickHouse/pull/69030) ([李扬](https://github.com/taiyang-li)).
|
||||
* Adding `RealTimeMicroseconds` metric in the HTTP Header `X-ClickHouse-Summary`. This way we can know the CPU time of a request without having to go to check it in `system.query_log`. [#69032](https://github.com/ClickHouse/ClickHouse/pull/69032) ([Alejandro](https://github.com/alexon1234)).
|
||||
* Added cp/mv commands for keeper client which atomically copies/moves node. [#69034](https://github.com/ClickHouse/ClickHouse/pull/69034) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Adds argument `scale` (default: `true`) to function `arrayAUC` which allows to skip the normalization step (issue [#69609](https://github.com/ClickHouse/ClickHouse/issues/69609)). [#69717](https://github.com/ClickHouse/ClickHouse/pull/69717) ([gabrielmcg44](https://github.com/gabrielmcg44)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Improve the join performance by rerange the right table by keys while the table keys are dense in left or inner hash join. [#60341](https://github.com/ClickHouse/ClickHouse/pull/60341) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
* Improve all join perfromance by append `RowRefList` or `RowRef` to AddedColumns for lazy output, while buildOutput, we use `RowRefList`/`RowRef` for output, and remove `is_join_get` condition from `buildOutput` for loop. [#63677](https://github.com/ClickHouse/ClickHouse/pull/63677) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
* Load filesystem cache metadata asynchronously to boot process, in order to make restarts faster. [#65736](https://github.com/ClickHouse/ClickHouse/pull/65736) ([Daniel Pozo Escalona](https://github.com/danipozo)).
|
||||
* Functions `array` and `map` were optimized to process certain common cases much faster. [#67707](https://github.com/ClickHouse/ClickHouse/pull/67707) ([李扬](https://github.com/taiyang-li)).
|
||||
* Trivial optimize on orc string reading especially when column contains no NULLs. [#67794](https://github.com/ClickHouse/ClickHouse/pull/67794) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Speed up requests to S3 when a profile is not set, credentials are not set, and IMDS is not available (for example, when you are querying a public bucket on a machine outside of a cloud). This closes [#52771](https://github.com/ClickHouse/ClickHouse/issues/52771). [#68082](https://github.com/ClickHouse/ClickHouse/pull/68082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* New algorithm to determine the unit of marks distribution between replicas by consistent hash. Different numbers of marks chosen for different read patterns to improve performance. [#68424](https://github.com/ClickHouse/ClickHouse/pull/68424) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Try to devirtualize format reader in RowInputFormatWithNamesAndTypes, and see if it could bring some performance improvement. [#68437](https://github.com/ClickHouse/ClickHouse/pull/68437) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add the parallel merge with key implementation to maximize the CPU utilization. [#68441](https://github.com/ClickHouse/ClickHouse/pull/68441) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Add settings `output_format_orc_dictionary_key_size_threshold` to allow user to enable dict encoding for string column in ORC output format. It helps reduce the output orc file size and improve reading performance significantly. [#68591](https://github.com/ClickHouse/ClickHouse/pull/68591) ([李扬](https://github.com/taiyang-li)).
|
||||
* Implemented reading of required files only during hive partitioning. [#68963](https://github.com/ClickHouse/ClickHouse/pull/68963) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Introduce new Keeper request RemoveRecursive which removes node with all it's subtree. [#69332](https://github.com/ClickHouse/ClickHouse/pull/69332) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Speedup insert performance with vector similarity index by adding data to vector index parallel. [#69493](https://github.com/ClickHouse/ClickHouse/pull/69493) ([flynn](https://github.com/ucasfl)).
|
||||
* Previously the algorithmic complexity of part deduplication logic in parallel replica announcement handling was O(n^2) which could take noticeable time for tables with many part (or partitions). This change makes the complexity O(n*log(n)). [#69596](https://github.com/ClickHouse/ClickHouse/pull/69596) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
|
||||
#### Improvement
|
||||
* Hardened parts of the codebase related to parsing of small entities. The following (minor) bugs were found and fixed: - if a `DeltaLake` table is partitioned by Bool, the partition value is always interpreted as false; - `ExternalDistributed` table was using only a single shard in the provided addresses; the value of `max_threads` setting and similar were printed as `'auto(N)'` instead of `auto(N)`. [#52503](https://github.com/ClickHouse/ClickHouse/pull/52503) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Refreshable materialized view improvements: append mode (`... REFRESH EVERY 1 MINUTE APPEND ...`) to add rows to existing table instead of overwriting the whole table, retries (disabled by default, configured in SETTINGS section of the query), `SYSTEM WAIT VIEW <name>` query that waits for the currently running refresh, some fixes. [#58934](https://github.com/ClickHouse/ClickHouse/pull/58934) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Use cgroup-specific metrics for CPU usage accounting instead of system-wide metrics. [#62003](https://github.com/ClickHouse/ClickHouse/pull/62003) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* IO scheduling for remote S3 disks is now done on the level of HTTP socket streams (instead of the whole S3 requests) to resolve `bandwidth_limit` throttling issues. [#65182](https://github.com/ClickHouse/ClickHouse/pull/65182) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Allow a user to have multiple authentication methods instead of only one. Allow authentication methods to be reset to most recently added method. If you want to run instances on 24.8 and one on 24.9 for some time, it's better to set `max_authentication_methods_per_user = 1` for that period to avoid potential errors. [#65277](https://github.com/ClickHouse/ClickHouse/pull/65277) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Functions `upperUTF8` and `lowerUTF8` were previously only able to uppercase / lowercase Cyrillic characters. This limitation is now removed and characters in arbitrary languages are uppercased/lowercased. Example: `SELECT upperUTF8('Süden')` now returns `SÜDEN`. [#65761](https://github.com/ClickHouse/ClickHouse/pull/65761) ([李扬](https://github.com/taiyang-li)).
|
||||
* When lightweight delete happens on a table with projection(s), despite users have choices either throw an exception (by default) or drop the projection lightweight delete would happen, now the third option is to still have lightweight delete and then rebuild projection(s). [#66169](https://github.com/ClickHouse/ClickHouse/pull/66169) ([Shichao](https://github.com/jsc0218)).
|
||||
* Two options (`dns_allow_resolve_names_to_ipv4` and `dns_allow_resolve_names_to_ipv6`) have been added, to allow block connections ip family. [#66895](https://github.com/ClickHouse/ClickHouse/pull/66895) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Added `min_max`as a new type of (experimental) statistics. It supports estimating range predicates over numeric columns, e.g. `x < 100`. [#67013](https://github.com/ClickHouse/ClickHouse/pull/67013) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Make C-z ignorance configurable (ignore_shell_suspend) in clickhouse-client. [#67134](https://github.com/ClickHouse/ClickHouse/pull/67134) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Improve castOrDefault from Variant/Dynamic columns so it works when inner types are not convertable at all. [#67150](https://github.com/ClickHouse/ClickHouse/pull/67150) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Improve unicode encoding in JSON output formats. Ensures that valid JSON is generated in the case of certain byte sequences in the result data. [#67938](https://github.com/ClickHouse/ClickHouse/pull/67938) ([mwoenker](https://github.com/mwoenker)).
|
||||
* Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Odbc: get http_max_tries from server configuration. [#68128](https://github.com/ClickHouse/ClickHouse/pull/68128) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)).
|
||||
* Add wildcard support for user identification in x509 SubjectAltName extension. [#68236](https://github.com/ClickHouse/ClickHouse/pull/68236) ([Marco Vilas Boas](https://github.com/marco-vb)).
|
||||
* Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Delete old code of named collections from dictionaries and substitute it to the new, which allows to use DDL created named collections in dictionaries. Closes [#60936](https://github.com/ClickHouse/ClickHouse/issues/60936), closes [#36890](https://github.com/ClickHouse/ClickHouse/issues/36890). [#68412](https://github.com/ClickHouse/ClickHouse/pull/68412) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Use HTTP/1.1 instead of HTTP/1.0 (set by default) for external HTTP authentication. [#68456](https://github.com/ClickHouse/ClickHouse/pull/68456) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Functions `upperUTF8` and `lowerUTF8` were previously only able to uppercase / lowercase Cyrillic characters. This limitation is now removed and arbitrary characters are uppercased/lowercased. Example: `SELECT upperUTF8('Süden')` now returns `SÜDEN`. [#68523](https://github.com/ClickHouse/ClickHouse/pull/68523) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Added a new set of metrics for Thread Pool introspection, providing deeper insights into thread pool performance and behavior. [#68674](https://github.com/ClickHouse/ClickHouse/pull/68674) ([filimonov](https://github.com/filimonov)).
|
||||
* Support query parameters in async inserts with format `Values`. [#68741](https://github.com/ClickHouse/ClickHouse/pull/68741) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Support Date32 on dateTrunc and toStartOfInterval. [#68874](https://github.com/ClickHouse/ClickHouse/pull/68874) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Add a new setting: `output_format_always_quote_identifiers` to always quote identifiers if it is enabled. - Add a new setting: `output_format_identifier_quoting_style` to set the identifier quoting style. Applicable values: `'None'`, `'Backticks'` (default), `'DoubleQuotes'`, `'BackticksMySQL'`. [#68896](https://github.com/ClickHouse/ClickHouse/pull/68896) ([tuanpach](https://github.com/tuanpach)).
|
||||
* Add `plan_step_name` and `plan_step_description` columns to `system.processors_profile_log`. [#68954](https://github.com/ClickHouse/ClickHouse/pull/68954) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Support for the Spanish language in the embedded dictionaries. [#69035](https://github.com/ClickHouse/ClickHouse/pull/69035) ([Vasily Okunev](https://github.com/VOkunev)).
|
||||
* Add CPU arch to short fault info. [#69037](https://github.com/ClickHouse/ClickHouse/pull/69037) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* CREATE TABLE AS copy PRIMARY KEY, ORDER BY, and similar clauses. Now it supports only for MergeTree family of table engines. [#69076](https://github.com/ClickHouse/ClickHouse/pull/69076) ([sakulali](https://github.com/sakulali)).
|
||||
* Replication of subset of columns is now available through MaterializedPostgreSQL. Closes [#33748](https://github.com/ClickHouse/ClickHouse/issues/33748). [#69092](https://github.com/ClickHouse/ClickHouse/pull/69092) ([Kruglov Kirill](https://github.com/1on)).
|
||||
* Add `std::string getHost(const std::string & key) const;` method to "base/poco/Util/include/Poco/Util/AbstractConfiguration.h" which does the same as `Poco::Util::AbstractConfiguration::getString` method but additionally does validity check to make sure value is a correct IP address or domain name. [#69130](https://github.com/ClickHouse/ClickHouse/pull/69130) ([Maxim](https://github.com/m4xxx1m)).
|
||||
* Do not block retries when establishing a new keeper connection. [#69148](https://github.com/ClickHouse/ClickHouse/pull/69148) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update DatabaseFactory so it would be possible for user defined database engines to have arguments, settings and table overrides (similar to StorageFactory). [#69201](https://github.com/ClickHouse/ClickHouse/pull/69201) ([Барыкин Никита Олегович](https://github.com/NikBarykin)).
|
||||
* Restore mode that replaces all external table engines and functions to Null (`restore_replace_external_engines_to_null`, `restore_replace_external_table_functions_to_null` settings) was failing if table had SETTINGS. Now it removes settings from table definition in this case and allows to restore such tables. [#69253](https://github.com/ClickHouse/ClickHouse/pull/69253) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Reduce memory usage of inserts to JSON by using adaptive write buffer size. A lot of files created by JSON column in wide part contains small amount of data and it doesn't make sense to allocate 1MB buffer for them. [#69272](https://github.com/ClickHouse/ClickHouse/pull/69272) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* CLICKHOUSE_PASSWORD is escaped for XML in clickhouse image's entrypoint. [#69301](https://github.com/ClickHouse/ClickHouse/pull/69301) ([aohoyd](https://github.com/aohoyd)).
|
||||
* Added user-level settings `min_free_disk_bytes_to_throw_insert` and `min_free_disk_ratio_to_throw_insert` to prevent insertions on disks that are almost full. [#69376](https://github.com/ClickHouse/ClickHouse/pull/69376) ([Marco Vilas Boas](https://github.com/marco-vb)).
|
||||
* Not retaining thread in concurrent hash join threadpool to avoid query excessively spawn threads. [#69406](https://github.com/ClickHouse/ClickHouse/pull/69406) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Allow empty arguments for arrayZip/arrayZipUnaligned, as concat did in https://github.com/ClickHouse/ClickHouse/pull/65887. It is for spark compatiability in Gluten CH Backend. [#69576](https://github.com/ClickHouse/ClickHouse/pull/69576) ([李扬](https://github.com/taiyang-li)).
|
||||
* Support more advanced SSL options for Keeper's internal communication (e.g. private keys with passphrase). [#69582](https://github.com/ClickHouse/ClickHouse/pull/69582) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Sleep for 10ms before retrying to acquire a lock in `databasereplicatedddlworker::enqueuequeryimpl`. [#69588](https://github.com/ClickHouse/ClickHouse/pull/69588) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Index analysis can take noticeable time for big tables with many parts or partitions. This change should enable killing a heavy query at that stage. [#69606](https://github.com/ClickHouse/ClickHouse/pull/69606) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Masking sensitive info in `gcs()` table function. [#69611](https://github.com/ClickHouse/ClickHouse/pull/69611) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Sync changes to `interpreterdropquery::executetodatabaseimpl` from the private fork. [#69670](https://github.com/ClickHouse/ClickHouse/pull/69670) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Backported in [#69905](https://github.com/ClickHouse/ClickHouse/issues/69905): Change the join to sort settings type to unsigned int. [#69886](https://github.com/ClickHouse/ClickHouse/pull/69886) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Rebuild projection for merges that reduce number of rows. [#62364](https://github.com/ClickHouse/ClickHouse/pull/62364) ([cangyin](https://github.com/cangyin)).
|
||||
* Fix attaching table when pg dbname contains "-" in MaterializedPostgreSQL. [#62730](https://github.com/ClickHouse/ClickHouse/pull/62730) ([takakawa](https://github.com/takakawa)).
|
||||
* Storage Join support for nullable columns in left table, close [#61247](https://github.com/ClickHouse/ClickHouse/issues/61247). [#66926](https://github.com/ClickHouse/ClickHouse/pull/66926) ([vdimir](https://github.com/vdimir)).
|
||||
* Incorrect query result with parallel replicas (distribute queries as well) when `IN` operator contains conversion to Decimal(). The bug was introduced with the new analyzer. [#67234](https://github.com/ClickHouse/ClickHouse/pull/67234) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix the problem that alter modfiy order by causes inconsistent metadata. [#67436](https://github.com/ClickHouse/ClickHouse/pull/67436) ([iceFireser](https://github.com/iceFireser)).
|
||||
* Fix the upper bound of the function `fromModifiedJulianDay`. It was supposed to be `9999-12-31` but was mistakenly set to `9999-01-01`. [#67583](https://github.com/ClickHouse/ClickHouse/pull/67583) ([PHO](https://github.com/depressed-pho)).
|
||||
* Fix when the index is not at the beginning of the tuple during `IN` query. [#67626](https://github.com/ClickHouse/ClickHouse/pull/67626) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fixed error on generated columns in MaterializedPostgreSQL when adnum ordering is broken [#63161](https://github.com/ClickHouse/ClickHouse/issues/63161). Fixed error on id column with nextval expression as default MaterializedPostgreSQL when there are generated columns in table. Fixed error on dropping publication with symbols except [a-z1-9-]. [#67664](https://github.com/ClickHouse/ClickHouse/pull/67664) ([Kruglov Kirill](https://github.com/1on)).
|
||||
* Fix expiration in `RoleCache`. [#67748](https://github.com/ClickHouse/ClickHouse/pull/67748) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix window view missing blocks due to slow flush to view. [#67983](https://github.com/ClickHouse/ClickHouse/pull/67983) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix MSAN issue caused by incorrect date format. [#68105](https://github.com/ClickHouse/ClickHouse/pull/68105) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Fixed crash in Parquet filtering when data types in the file substantially differ from requested types (e.g. `... FROM file('a.parquet', Parquet, 'x String')`, but the file has `x Int64`). Without this fix, use `input_format_parquet_filter_push_down = 0` as a workaround. [#68131](https://github.com/ClickHouse/ClickHouse/pull/68131) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix crash in `lag`/`lead` which is introduced in [#67091](https://github.com/ClickHouse/ClickHouse/issues/67091). [#68262](https://github.com/ClickHouse/ClickHouse/pull/68262) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* After https://github.com/ClickHouse/ClickHouse/pull/61984 `schema_inference_make_columns_nullable=0` still can make columns `Nullable` in Parquet/Arrow formats. The change was backward incompatible and users noticed the changes in the behaviour. This PR makes `schema_inference_make_columns_nullable=0` to work as before (no Nullable columns will be inferred) and introduces new value `auto` for this setting that will make columns `Nullable` only if data has information about nullability. [#68298](https://github.com/ClickHouse/ClickHouse/pull/68298) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fixes [#50868](https://github.com/ClickHouse/ClickHouse/issues/50868). Small DateTime64 constant values returned by a nested subquery inside a distributed query were wrongly transformed to Nulls, thus causing errors and possible incorrect query results. [#68323](https://github.com/ClickHouse/ClickHouse/pull/68323) ([Shankar](https://github.com/shiyer7474)).
|
||||
* Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix bug in key condition. [#68354](https://github.com/ClickHouse/ClickHouse/pull/68354) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix crash on drop or rename a role that is used in LDAP external user directory. [#68355](https://github.com/ClickHouse/ClickHouse/pull/68355) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix Progress column value of system.view_refreshes greater than 1 [#68377](https://github.com/ClickHouse/ClickHouse/issues/68377). [#68378](https://github.com/ClickHouse/ClickHouse/pull/68378) ([megao](https://github.com/jetgm)).
|
||||
* Process regexp flags correctly. [#68389](https://github.com/ClickHouse/ClickHouse/pull/68389) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* PostgreSQL-style cast operator (`::`) works correctly even for SQL-style hex and binary string literals (e.g., `SELECT x'414243'::String`). This closes [#68324](https://github.com/ClickHouse/ClickHouse/issues/68324). [#68482](https://github.com/ClickHouse/ClickHouse/pull/68482) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Minor patch for https://github.com/ClickHouse/ClickHouse/pull/68131. [#68494](https://github.com/ClickHouse/ClickHouse/pull/68494) ([Chang chen](https://github.com/baibaichen)).
|
||||
* Fix [#68239](https://github.com/ClickHouse/ClickHouse/issues/68239) SAMPLE n where n is an integer. [#68499](https://github.com/ClickHouse/ClickHouse/pull/68499) ([Denis Hananein](https://github.com/denis-hananein)).
|
||||
* Fix bug in mann-whitney-utest when the size of two districutions are not equal. [#68556](https://github.com/ClickHouse/ClickHouse/pull/68556) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* After unexpected restart, fail to start replication of ReplicatedMergeTree due to abnormal handling of covered-by-broken part. [#68584](https://github.com/ClickHouse/ClickHouse/pull/68584) ([baolin](https://github.com/baolinhuang)).
|
||||
* Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Full text index may filter out wrong columns when index multiple columns, it didn't reset row_id between different columns, the reproduce procedure is in tests/queries/0_stateless/03228_full_text_with_multi_col.sql. Without this. [#68644](https://github.com/ClickHouse/ClickHouse/pull/68644) ([siyuan](https://github.com/linkwk7)).
|
||||
* Fix invalid character '\t' and '\n' in replica_name when creating a Replicated table, which causes incorrect parsing of 'source replica' in LogEntry. Mentioned in issue [#68640](https://github.com/ClickHouse/ClickHouse/issues/68640). [#68645](https://github.com/ClickHouse/ClickHouse/pull/68645) ([Zhigao Hong](https://github.com/zghong)).
|
||||
* Added back virtual columns ` _table` and `_database` to distributed tables. They were available until version 24.3. [#68672](https://github.com/ClickHouse/ClickHouse/pull/68672) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix possible error `Size of permutation (0) is less than required (...)` during Variant column permutation. [#68681](https://github.com/ClickHouse/ClickHouse/pull/68681) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix possible error `DB::Exception: Block structure mismatch in joined block stream: different columns:` with new JSON column. [#68686](https://github.com/ClickHouse/ClickHouse/pull/68686) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix issue with materialized constant keys when hashing maps with arrays as keys in functions `sipHash(64/128)Keyed`. [#68731](https://github.com/ClickHouse/ClickHouse/pull/68731) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Make `ColumnsDescription::toString` format each column using the same `IAST::FormatState object`. This results in uniform columns metadata being written to disk and ZooKeeper. [#68733](https://github.com/ClickHouse/ClickHouse/pull/68733) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Fix merging of aggregated data for grouping sets. [#68744](https://github.com/ClickHouse/ClickHouse/pull/68744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix logical error, when we create a replicated merge tree, alter a column and then execute modify statistics. [#68820](https://github.com/ClickHouse/ClickHouse/pull/68820) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix resolving dynamic subcolumns from subqueries in analyzer. [#68824](https://github.com/ClickHouse/ClickHouse/pull/68824) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix complex types metadata parsing in DeltaLake. Closes [#68739](https://github.com/ClickHouse/ClickHouse/issues/68739). [#68836](https://github.com/ClickHouse/ClickHouse/pull/68836) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed asynchronous inserts in case when metadata of table is changed (by `ALTER ADD/MODIFY COLUMN` queries) after insert but before flush to the table. [#68837](https://github.com/ClickHouse/ClickHouse/pull/68837) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix unexpected exception when passing empty tuple in array. This fixes [#68618](https://github.com/ClickHouse/ClickHouse/issues/68618). [#68848](https://github.com/ClickHouse/ClickHouse/pull/68848) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix parsing pure metadata mutations commands. [#68935](https://github.com/ClickHouse/ClickHouse/pull/68935) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix possible wrong result during anyHeavy state merge. [#68950](https://github.com/ClickHouse/ClickHouse/pull/68950) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fixed writing to Materialized Views with enabled setting `optimize_functions_to_subcolumns`. [#68951](https://github.com/ClickHouse/ClickHouse/pull/68951) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Don't use serializations cache in const Dynamic column methods. It could let to use-of-unitialized value or even race condition during aggregations. [#68953](https://github.com/ClickHouse/ClickHouse/pull/68953) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix parsing error when null should be inserted as default in some cases during JSON type parsing. [#68955](https://github.com/ClickHouse/ClickHouse/pull/68955) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix `Content-Encoding` not sent in some compressed responses. [#64802](https://github.com/ClickHouse/ClickHouse/issues/64802). [#68975](https://github.com/ClickHouse/ClickHouse/pull/68975) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* There were cases when path was concatenated incorrectly and had the `//` part in it, solving this problem using path normalization. [#69066](https://github.com/ClickHouse/ClickHouse/pull/69066) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix logical error when we have empty async insert. [#69080](https://github.com/ClickHouse/ClickHouse/pull/69080) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fixed data race of progress indication in clickhouse-client during query canceling. [#69081](https://github.com/ClickHouse/ClickHouse/pull/69081) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fix a bug that the vector similarity index (currently experimental) was not utilized when used with cosine distance as distance function. [#69090](https://github.com/ClickHouse/ClickHouse/pull/69090) ([flynn](https://github.com/ucasfl)).
|
||||
* This change addresses an issue where attempting to create a Replicated database again after a server failure during the initial creation process could result in error. [#69102](https://github.com/ClickHouse/ClickHouse/pull/69102) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Don't infer Bool type from String in CSV when `input_format_csv_try_infer_numbers_from_strings = 1` because we don't allow reading bool values from strings. [#69109](https://github.com/ClickHouse/ClickHouse/pull/69109) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix explain ast insert queries parsing errors on client when `--multiquery` is enabled. [#69123](https://github.com/ClickHouse/ClickHouse/pull/69123) ([wxybear](https://github.com/wxybear)).
|
||||
* `UNION` clause in subqueries wasn't handled correctly in queries with parallel replicas and lead to LOGICAL_ERROR `Duplicate announcement received for replica`. [#69146](https://github.com/ClickHouse/ClickHouse/pull/69146) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix propogating structure argument in s3Cluster. Previously the `DEFAULT` expression of the column could be lost when sending the query to the replicas in s3Cluster. [#69147](https://github.com/ClickHouse/ClickHouse/pull/69147) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Respect format settings in Values format during conversion from expression to the destination type. [#69149](https://github.com/ClickHouse/ClickHouse/pull/69149) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix `clickhouse-client --queries-file` for readonly users (previously fails with `Cannot modify 'log_comment' setting in readonly mode`). [#69175](https://github.com/ClickHouse/ClickHouse/pull/69175) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix data race in clickhouse-client when it's piped to a process that terminated early. [#69186](https://github.com/ClickHouse/ClickHouse/pull/69186) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix incorrect results of Fix uniq and GROUP BY for JSON/Dynamic types. [#69203](https://github.com/ClickHouse/ClickHouse/pull/69203) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix the INFILE format detection for asynchronous inserts. If the format is not explicitly defined in the FORMAT clause, it can be detected from the INFILE file extension. [#69237](https://github.com/ClickHouse/ClickHouse/pull/69237) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* After [this issue](https://github.com/ClickHouse/ClickHouse/pull/59946#issuecomment-1943653197) there are quite a few table replicas in production such that their `metadata_version` node value is both equal to `0` and is different from the respective table's `metadata` node version. This leads to `alter` queries failing on such replicas. [#69274](https://github.com/ClickHouse/ClickHouse/pull/69274) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Backported in [#69986](https://github.com/ClickHouse/ClickHouse/issues/69986): Fix inf loop after `restore replica` in the replicated merge tree with zero copy. [#69293](https://github.com/ClickHouse/ClickHouse/pull/69293) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Mark Dynamic type as not safe primary key type to avoid issues with Fields. [#69311](https://github.com/ClickHouse/ClickHouse/pull/69311) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Improve restoring of access entities' dependencies. [#69346](https://github.com/ClickHouse/ClickHouse/pull/69346) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix undefined behavior when all connection attempts fail getting a connection for insertions. [#69390](https://github.com/ClickHouse/ClickHouse/pull/69390) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Close [#69135](https://github.com/ClickHouse/ClickHouse/issues/69135). If we try to reuse joined data for `cross` join, but this could not happen in ClickHouse at present. It's better to keep `have_compressed` in `reuseJoinedData`. [#69404](https://github.com/ClickHouse/ClickHouse/pull/69404) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Make `materialize()` function return full column when parameter is a sparse column. [#69429](https://github.com/ClickHouse/ClickHouse/pull/69429) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fixed a `LOGICAL_ERROR` with function `sqidDecode` ([#69450](https://github.com/ClickHouse/ClickHouse/issues/69450)). [#69451](https://github.com/ClickHouse/ClickHouse/pull/69451) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Quick fix for s3queue problem on 24.6 or create query with database replicated. [#69454](https://github.com/ClickHouse/ClickHouse/pull/69454) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed case when memory consumption was too high because of the squashing in `INSERT INTO ... SELECT` or `CREATE TABLE AS SELECT` queries. [#69469](https://github.com/ClickHouse/ClickHouse/pull/69469) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Statements `SHOW COLUMNS` and `SHOW INDEX` now work properly if the table has dots in its name. [#69514](https://github.com/ClickHouse/ClickHouse/pull/69514) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Usage of the query cache for queries with an overflow mode != 'throw' is now disallowed. This prevents situations where potentially truncated and incorrect query results could be stored in the query cache. (issue [#67476](https://github.com/ClickHouse/ClickHouse/issues/67476)). [#69549](https://github.com/ClickHouse/ClickHouse/pull/69549) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Keep original order of conditions during move to prewhere. Previously the order could change and it could lead to failing queries when the order is important. [#69560](https://github.com/ClickHouse/ClickHouse/pull/69560) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix Keeper multi-request preprocessing after ZNOAUTH error. [#69627](https://github.com/ClickHouse/ClickHouse/pull/69627) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix METADATA_MISMATCH that might have happened due to TTL with a WHERE clause in DatabaseReplicated when creating a new replica. [#69736](https://github.com/ClickHouse/ClickHouse/pull/69736) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix `StorageS3(Azure)Queue` settings `tracked_file_ttl_sec`. We wrote it to keeper with key `tracked_file_ttl_sec`, but read as `tracked_files_ttl_sec`, which was a typo. [#69742](https://github.com/ClickHouse/ClickHouse/pull/69742) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Make getHyperrectangleForRowGroup not throw an exception when the data type in parquet file is not convertable into the requested data type. Solved the user's problem when the Parquet file had Decimal64 data type and the column data type was DateTime. [#69745](https://github.com/ClickHouse/ClickHouse/pull/69745) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Backported in [#70021](https://github.com/ClickHouse/ClickHouse/issues/70021): Fix analyzer default with old compatibility value. [#69895](https://github.com/ClickHouse/ClickHouse/pull/69895) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69943](https://github.com/ClickHouse/ClickHouse/issues/69943): Don't check dependencies during CREATE OR REPLACE VIEW during DROP of old table. Previously CREATE OR REPLACE query failed when there are dependent tables of the recreated view. [#69907](https://github.com/ClickHouse/ClickHouse/pull/69907) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Backported in [#70003](https://github.com/ClickHouse/ClickHouse/issues/70003): Now SQL security will work with parameterized views correctly. [#69984](https://github.com/ClickHouse/ClickHouse/pull/69984) ([pufit](https://github.com/pufit)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Allow to specify min and max for random settings in the test after tags in a form `setting=(min, max)`. For some tests really low value of a setting can lead to timeout (for example `index_granularity=1`) but we still want to randomize this setting in such tests. To avoid timeouts we can specify custom minimum value for this setting. [#67875](https://github.com/ClickHouse/ClickHouse/pull/67875) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Integration tests flaky check now runs each module(file) as a separate group. It allows to apply timeout per module. Previously slow tests could use all the time and other tests were not run in flaky check with successful result. [#68380](https://github.com/ClickHouse/ClickHouse/pull/68380) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Backported in [#69980](https://github.com/ClickHouse/ClickHouse/issues/69980): Makes dbms independent from clickhouse_functions. [#69914](https://github.com/ClickHouse/ClickHouse/pull/69914) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#69995](https://github.com/ClickHouse/ClickHouse/issues/69995):. [#69989](https://github.com/ClickHouse/ClickHouse/pull/69989) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Revert "Fix AWS ECS""'. [#65362](https://github.com/ClickHouse/ClickHouse/pull/65362) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* NO CL ENTRY: 'Revert "[RFC] Fix settings/current_database in system.processes for async BACKUP/RESTORE"'. [#68386](https://github.com/ClickHouse/ClickHouse/pull/68386) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* NO CL ENTRY: 'Revert "Improve compatibility of `upper/lowerUTF8` with Spark"'. [#68510](https://github.com/ClickHouse/ClickHouse/pull/68510) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* NO CL ENTRY: 'Revert "Fix unexpected behavior with `FORMAT` and `SETTINGS` parsing"'. [#68608](https://github.com/ClickHouse/ClickHouse/pull/68608) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* NO CL ENTRY: 'Revert "Fix prewhere without columns and without adaptive index granularity (almost w/o anything)"'. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* NO CL ENTRY: 'Revert "Speed up some Kafka tests with multiprocessing"'. [#69356](https://github.com/ClickHouse/ClickHouse/pull/69356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* NO CL ENTRY: 'Revert "Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. V"'. [#69393](https://github.com/ClickHouse/ClickHouse/pull/69393) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* NO CL ENTRY: 'Revert "Add user-level settings min_free_diskspace_bytes_to_throw_insert and min_free_diskspace_ratio_to_throw_insert"'. [#69705](https://github.com/ClickHouse/ClickHouse/pull/69705) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* NO CL ENTRY: 'Revert "Support more oss endpoints"'. [#69779](https://github.com/ClickHouse/ClickHouse/pull/69779) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Followup for [#56996](https://github.com/ClickHouse/ClickHouse/issues/56996). [#58289](https://github.com/ClickHouse/ClickHouse/pull/58289) ([vdimir](https://github.com/vdimir)).
|
||||
* Add chunked wrapper to native protocol. [#63781](https://github.com/ClickHouse/ClickHouse/pull/63781) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Local plan for parallel replicas. [#64448](https://github.com/ClickHouse/ClickHouse/pull/64448) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Disallow creating refreshable MV on Linux < 3.15. [#64953](https://github.com/ClickHouse/ClickHouse/pull/64953) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Allow query profiler period to be longer than 4 seconds. [#65255](https://github.com/ClickHouse/ClickHouse/pull/65255) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Improved getting of alter conversions for queries. [#65832](https://github.com/ClickHouse/ClickHouse/pull/65832) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Simplify some memory tracking parts. [#66648](https://github.com/ClickHouse/ClickHouse/pull/66648) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* What if we tighten limits for functional tests?. [#66837](https://github.com/ClickHouse/ClickHouse/pull/66837) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix flaky `test_delayed_replica_failover`. [#67541](https://github.com/ClickHouse/ClickHouse/pull/67541) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fix(asan): access destroyed shared context from handleCrash(). [#67700](https://github.com/ClickHouse/ClickHouse/pull/67700) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. II. [#67749](https://github.com/ClickHouse/ClickHouse/pull/67749) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Make `ColumnLowCardinality::getName()` consistent with other columns. [#67789](https://github.com/ClickHouse/ClickHouse/pull/67789) ([李扬](https://github.com/taiyang-li)).
|
||||
* Catch exception in destructor of LocalFileHolder. [#67891](https://github.com/ClickHouse/ClickHouse/pull/67891) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Add keeper error description to the message. [#67935](https://github.com/ClickHouse/ClickHouse/pull/67935) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Timeout handling for functional and integration tests, store artifacts and report if timed out - sets 2h default timeout for all jobs. [#67944](https://github.com/ClickHouse/ClickHouse/pull/67944) ([Max K.](https://github.com/maxknv)).
|
||||
* Add test cases to 03217_datetime64_constant_to_ast. [#67966](https://github.com/ClickHouse/ClickHouse/pull/67966) ([vdimir](https://github.com/vdimir)).
|
||||
* Remove some no-parallel tags from tests (Part 6). [#68048](https://github.com/ClickHouse/ClickHouse/pull/68048) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Keeper improvements package. [#68108](https://github.com/ClickHouse/ClickHouse/pull/68108) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* CI: Docker images clean from test scripts. [#68117](https://github.com/ClickHouse/ClickHouse/pull/68117) ([Max K.](https://github.com/maxknv)).
|
||||
* Output an operation error for ZK Multi request failed operation into log. [#68127](https://github.com/ClickHouse/ClickHouse/pull/68127) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Better test for Not-ready Set is passed in system.* tables. [#68139](https://github.com/ClickHouse/ClickHouse/pull/68139) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* ci: add more logs in the functional tests reports. [#68153](https://github.com/ClickHouse/ClickHouse/pull/68153) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix fundamentally broken `test_throttling`. [#68180](https://github.com/ClickHouse/ClickHouse/pull/68180) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix test `00600_replace_running_query`. [#68184](https://github.com/ClickHouse/ClickHouse/pull/68184) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix non-deterministic result order in `test_storage_mysql.test_mysql_distributed`. [#68188](https://github.com/ClickHouse/ClickHouse/pull/68188) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Try to unflaky `test_storage_s3_queue/test.py::test_shards_distributed`. [#68233](https://github.com/ClickHouse/ClickHouse/pull/68233) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Bump usearch to v2.13.2. [#68248](https://github.com/ClickHouse/ClickHouse/pull/68248) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix 'Refresh set entry already exists'. [#68249](https://github.com/ClickHouse/ClickHouse/pull/68249) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Avoid ignoring errors of execute_process() (set COMMAND_ERROR_IS_FATAL=ANY). [#68267](https://github.com/ClickHouse/ClickHouse/pull/68267) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix FullSortingJoinTest.AsofGreaterGeneratedTestData with empty data. [#68280](https://github.com/ClickHouse/ClickHouse/pull/68280) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix min/max time columns in TimeSeries table. [#68282](https://github.com/ClickHouse/ClickHouse/pull/68282) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Update fuzzer dictionary. [#68284](https://github.com/ClickHouse/ClickHouse/pull/68284) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* [Green CI] Test 00652_mergetree_mutations is flaky. [#68286](https://github.com/ClickHouse/ClickHouse/pull/68286) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
* Fix test storage_join_direct_join. [#68287](https://github.com/ClickHouse/ClickHouse/pull/68287) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Add execution status to PipelineExecutor. Avoid LOGICAL_ERROR if the pushing pipeline was canceled by timeout. Replace `Pipeline for PushingPipelineExecutor was finished before all data was inserted. (LOGICAL_ERROR)` to `QUERY_WAS_CANCELLED`. Closes [#63979](https://github.com/ClickHouse/ClickHouse/issues/63979). [#68291](https://github.com/ClickHouse/ClickHouse/pull/68291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* CI: Create new release branch workflow updates. [#68294](https://github.com/ClickHouse/ClickHouse/pull/68294) ([Max K.](https://github.com/maxknv)).
|
||||
* Update version after release. [#68306](https://github.com/ClickHouse/ClickHouse/pull/68306) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Various vector similarity index related fixes. [#68308](https://github.com/ClickHouse/ClickHouse/pull/68308) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix: min marks to read overflow with parallel replicas. [#68309](https://github.com/ClickHouse/ClickHouse/pull/68309) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Apply libunwind changes needed for musl. [#68312](https://github.com/ClickHouse/ClickHouse/pull/68312) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update musl to 1.2.5 with unwind info. [#68313](https://github.com/ClickHouse/ClickHouse/pull/68313) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix data race on SampleKey. [#68321](https://github.com/ClickHouse/ClickHouse/pull/68321) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix part name in 00961_check_table. [#68325](https://github.com/ClickHouse/ClickHouse/pull/68325) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix 02995_index_10 timeout. [#68329](https://github.com/ClickHouse/ClickHouse/pull/68329) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: Fix for change log critical bug fix regex. [#68332](https://github.com/ClickHouse/ClickHouse/pull/68332) ([Max K.](https://github.com/maxknv)).
|
||||
* Optionally re-enable compilation with `-O0`. [#68352](https://github.com/ClickHouse/ClickHouse/pull/68352) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add debug info for `00180_no_seek_avoiding_when_reading_from_cache`. [#68353](https://github.com/ClickHouse/ClickHouse/pull/68353) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix data race in `DynamicResourceManager::updateConfiguration`. [#68356](https://github.com/ClickHouse/ClickHouse/pull/68356) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Update version_date.tsv and changelog after v24.3.7.30-lts. [#68357](https://github.com/ClickHouse/ClickHouse/pull/68357) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* More improvements in integration tests. [#68358](https://github.com/ClickHouse/ClickHouse/pull/68358) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Rename: S3DiskNoKeyErrors -> DiskS3NoSuchKeyErrors. [#68361](https://github.com/ClickHouse/ClickHouse/pull/68361) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* CI: Minor fixes for changelog and release exceptions. [#68362](https://github.com/ClickHouse/ClickHouse/pull/68362) ([Max K.](https://github.com/maxknv)).
|
||||
* Check that setProcessListElement() is not called on global context. [#68364](https://github.com/ClickHouse/ClickHouse/pull/68364) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix off-by-one inline function info in stack traces. [#68365](https://github.com/ClickHouse/ClickHouse/pull/68365) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Check that merge entries are valid. [#68366](https://github.com/ClickHouse/ClickHouse/pull/68366) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* performance comparison test for output_format_parquet_write_page_index. [#68367](https://github.com/ClickHouse/ClickHouse/pull/68367) ([max-vostrikov](https://github.com/max-vostrikov)).
|
||||
* Add a test for [#57324](https://github.com/ClickHouse/ClickHouse/issues/57324). [#68376](https://github.com/ClickHouse/ClickHouse/pull/68376) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* CI: Auto release workflow. [#68402](https://github.com/ClickHouse/ClickHouse/pull/68402) ([Max K.](https://github.com/maxknv)).
|
||||
* Update delta lake test. [#68404](https://github.com/ClickHouse/ClickHouse/pull/68404) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add a test for [#59118](https://github.com/ClickHouse/ClickHouse/issues/59118). [#68406](https://github.com/ClickHouse/ClickHouse/pull/68406) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. IV. [#68407](https://github.com/ClickHouse/ClickHouse/pull/68407) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Try to fix test 03221_mutation_analyzer_skip_part. [#68409](https://github.com/ClickHouse/ClickHouse/pull/68409) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Add `NotStarted` status to not-prepared processors. This is for better diagnostics in the case of PipelineStuck. [#68413](https://github.com/ClickHouse/ClickHouse/pull/68413) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Update log message. [#68426](https://github.com/ClickHouse/ClickHouse/pull/68426) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix 01119_session_log flakiness. [#68433](https://github.com/ClickHouse/ClickHouse/pull/68433) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Try to mitigate 02818_memory_profiler_sample_min_max_allocation_size flakiness. [#68434](https://github.com/ClickHouse/ClickHouse/pull/68434) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update analyzer_tech_debt.txt. [#68443](https://github.com/ClickHouse/ClickHouse/pull/68443) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove obsolete `-n` / `--multiquery` from tests. [#68447](https://github.com/ClickHouse/ClickHouse/pull/68447) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Check for invalid regexp in JSON SKIP REGEXP section. [#68451](https://github.com/ClickHouse/ClickHouse/pull/68451) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Better inference of date times 2. [#68452](https://github.com/ClickHouse/ClickHouse/pull/68452) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
|
||||
* Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix test `02122_join_group_by_timeout`. [#68462](https://github.com/ClickHouse/ClickHouse/pull/68462) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add check for `min_number_of_marks = 0` in parallel replicas requests. [#68476](https://github.com/ClickHouse/ClickHouse/pull/68476) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix `Broken pipe` error for `03149_numbers_max_block_size_zero.sh`. [#68478](https://github.com/ClickHouse/ClickHouse/pull/68478) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Use temporary tables for input and output in `clickhouse-local`. [#68483](https://github.com/ClickHouse/ClickHouse/pull/68483) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Pass-through RENAME and UUID-related operations in Overlay database to underlying databases. [#68486](https://github.com/ClickHouse/ClickHouse/pull/68486) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix output of clickhouse-test in case of tests timeouts. [#68487](https://github.com/ClickHouse/ClickHouse/pull/68487) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Part of [#68024](https://github.com/ClickHouse/ClickHouse/issues/68024). [#68488](https://github.com/ClickHouse/ClickHouse/pull/68488) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Miscellaneous changes in BaseDaemon. [#68489](https://github.com/ClickHouse/ClickHouse/pull/68489) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Miscellaneous changes from [#66999](https://github.com/ClickHouse/ClickHouse/issues/66999) (2). [#68490](https://github.com/ClickHouse/ClickHouse/pull/68490) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Miscellaneous. [#68504](https://github.com/ClickHouse/ClickHouse/pull/68504) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Miscellanous changes from [#66999](https://github.com/ClickHouse/ClickHouse/issues/66999). [#68507](https://github.com/ClickHouse/ClickHouse/pull/68507) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix test `01017_uniqCombined_memory_usage`. [#68508](https://github.com/ClickHouse/ClickHouse/pull/68508) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix race condition in MergeTreeRestartingThread. [#68513](https://github.com/ClickHouse/ClickHouse/pull/68513) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix test `01079_bad_alters_zookeeper_long`. [#68515](https://github.com/ClickHouse/ClickHouse/pull/68515) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bad exception messages. [#68520](https://github.com/ClickHouse/ClickHouse/pull/68520) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* CI: Auto Releases in prod. [#68525](https://github.com/ClickHouse/ClickHouse/pull/68525) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix build with `-DENABLE_LIBRARIES=0`. [#68527](https://github.com/ClickHouse/ClickHouse/pull/68527) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Removed the use of the context during function execution, as it was causing errors when the context was expired. [#68534](https://github.com/ClickHouse/ClickHouse/pull/68534) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix after [#68291](https://github.com/ClickHouse/ClickHouse/issues/68291). [#68548](https://github.com/ClickHouse/ClickHouse/pull/68548) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Stateless tests: increase hung check timeout. [#68552](https://github.com/ClickHouse/ClickHouse/pull/68552) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)).
|
||||
* Reduce memory consumption in ghdata JSON tests. [#68571](https://github.com/ClickHouse/ClickHouse/pull/68571) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Move enabling experimental settings to a separate file. [#68577](https://github.com/ClickHouse/ClickHouse/pull/68577) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Minor logging fixes. [#68578](https://github.com/ClickHouse/ClickHouse/pull/68578) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix enumerating dynamic subcolumns. [#68582](https://github.com/ClickHouse/ClickHouse/pull/68582) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Update version_date.tsv and changelog after v23.8.16.16-lts. [#68593](https://github.com/ClickHouse/ClickHouse/pull/68593) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update 02995_index_7.sh. [#68594](https://github.com/ClickHouse/ClickHouse/pull/68594) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix style in Functions/printf.cpp. [#68595](https://github.com/ClickHouse/ClickHouse/pull/68595) ([vdimir](https://github.com/vdimir)).
|
||||
* Update version_date.tsv and changelog after v24.3.8.13-lts. [#68599](https://github.com/ClickHouse/ClickHouse/pull/68599) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Prioritizing of virtual columns in hive partitioning. [#68606](https://github.com/ClickHouse/ClickHouse/pull/68606) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* materialized_view_deduplication performance comparison test. [#68607](https://github.com/ClickHouse/ClickHouse/pull/68607) ([max-vostrikov](https://github.com/max-vostrikov)).
|
||||
* Update README.md. [#68610](https://github.com/ClickHouse/ClickHouse/pull/68610) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* patch: fix reference to sorting key in primary key docs. [#68612](https://github.com/ClickHouse/ClickHouse/pull/68612) ([Leon Kozlowski](https://github.com/leonkozlowski)).
|
||||
* Do not fuzz 02835 drop user during session. [#68620](https://github.com/ClickHouse/ClickHouse/pull/68620) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix 03221_s3_imds_decent_timeout. [#68627](https://github.com/ClickHouse/ClickHouse/pull/68627) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix test `01079_bad_alters_zookeeper_long`. [#68629](https://github.com/ClickHouse/ClickHouse/pull/68629) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Disable min_bytes_to_use_direct_io in some tests with Dynamic/JSON subcolumns because it's broken. [#68632](https://github.com/ClickHouse/ClickHouse/pull/68632) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix false "Killed by signal (output files)" in stress_tests.lib. [#68638](https://github.com/ClickHouse/ClickHouse/pull/68638) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Remove wrong release version. [#68646](https://github.com/ClickHouse/ClickHouse/pull/68646) ([Max K.](https://github.com/maxknv)).
|
||||
* Increase connectTimeoutMs IMDS connection timeout to 50ms to avoid failures in CI. [#68653](https://github.com/ClickHouse/ClickHouse/pull/68653) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* CI: Disable SQLLogic job. [#68654](https://github.com/ClickHouse/ClickHouse/pull/68654) ([Max K.](https://github.com/maxknv)).
|
||||
* Update version_date.tsv and changelog after v24.8.1.2684-lts. [#68664](https://github.com/ClickHouse/ClickHouse/pull/68664) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix flaky test test_distributed_replica_max_ignored_errors. [#68665](https://github.com/ClickHouse/ClickHouse/pull/68665) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Improve `02293_http_header_full_summary_without_progress` logging. [#68666](https://github.com/ClickHouse/ClickHouse/pull/68666) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Fix flaky check when all tests are skipped. [#68673](https://github.com/ClickHouse/ClickHouse/pull/68673) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Vector similarity index: make `bf16` the default quantization. [#68678](https://github.com/ClickHouse/ClickHouse/pull/68678) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* 3h is not enough. [#68683](https://github.com/ClickHouse/ClickHouse/pull/68683) ([Max K.](https://github.com/maxknv)).
|
||||
* Un-flake 01278_random_string_utf8. [#68684](https://github.com/ClickHouse/ClickHouse/pull/68684) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* CI: Integration tests timeout to 3h. [#68685](https://github.com/ClickHouse/ClickHouse/pull/68685) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix Upgrade Check: move some settings to 24.9 section. [#68688](https://github.com/ClickHouse/ClickHouse/pull/68688) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Update README.md. [#68690](https://github.com/ClickHouse/ClickHouse/pull/68690) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Use proper ErrorCodes, replace NETWORK_ERROR by HDFS_ERROR. [#68696](https://github.com/ClickHouse/ClickHouse/pull/68696) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix for failing test in CI. [#68701](https://github.com/ClickHouse/ClickHouse/pull/68701) ([Pedro Ferreira](https://github.com/PedroTadim)).
|
||||
* CI: Stress test fix. [#68712](https://github.com/ClickHouse/ClickHouse/pull/68712) ([Max K.](https://github.com/maxknv)).
|
||||
* Speedup test 02150_index_hypothesis_race_long. [#68713](https://github.com/ClickHouse/ClickHouse/pull/68713) ([vdimir](https://github.com/vdimir)).
|
||||
* Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper`. [#68715](https://github.com/ClickHouse/ClickHouse/pull/68715) ([alesapin](https://github.com/alesapin)).
|
||||
* Update README.md - Meetups update. [#68723](https://github.com/ClickHouse/ClickHouse/pull/68723) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Fix flaky check. [#68725](https://github.com/ClickHouse/ClickHouse/pull/68725) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* fix shutdown for PeriodicLog. [#68728](https://github.com/ClickHouse/ClickHouse/pull/68728) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Update version_date.tsv and changelog after v24.5.5.41-stable. [#68729](https://github.com/ClickHouse/ClickHouse/pull/68729) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Bump Replxx to support custom descriptors. [#68730](https://github.com/ClickHouse/ClickHouse/pull/68730) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Update version_date.tsv and changelog after v24.6.3.38-stable. [#68732](https://github.com/ClickHouse/ClickHouse/pull/68732) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fixes `00080_show_tables_and_system_tables` that was a bit flaky. [#68734](https://github.com/ClickHouse/ClickHouse/pull/68734) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Update version_date.tsv and changelog after v24.7.3.47-stable. [#68735](https://github.com/ClickHouse/ClickHouse/pull/68735) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix flaky test 00989_parallel_parts_loading. [#68737](https://github.com/ClickHouse/ClickHouse/pull/68737) ([alesapin](https://github.com/alesapin)).
|
||||
* Update README.md. [#68738](https://github.com/ClickHouse/ClickHouse/pull/68738) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Update version_date.tsv and changelog after v24.8.2.3-lts. [#68740](https://github.com/ClickHouse/ClickHouse/pull/68740) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.5.5.41-stable. [#68745](https://github.com/ClickHouse/ClickHouse/pull/68745) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).
|
||||
* Try to disable rerun check if job triggered manually. [#68751](https://github.com/ClickHouse/ClickHouse/pull/68751) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix 2477 timeout. [#68752](https://github.com/ClickHouse/ClickHouse/pull/68752) ([Shichao](https://github.com/jsc0218)).
|
||||
* Update README.md. [#68764](https://github.com/ClickHouse/ClickHouse/pull/68764) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Update version_date.tsv and changelog after v24.5.6.45-stable. [#68766](https://github.com/ClickHouse/ClickHouse/pull/68766) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.6.4.42-stable. [#68767](https://github.com/ClickHouse/ClickHouse/pull/68767) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.7.4.51-stable. [#68768](https://github.com/ClickHouse/ClickHouse/pull/68768) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Split test case and reduce number of random runs to reduce the time necessary to run the test. [#68772](https://github.com/ClickHouse/ClickHouse/pull/68772) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Check setting use_json_alias_for_old_object_type in runtime. [#68793](https://github.com/ClickHouse/ClickHouse/pull/68793) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Make dynamic structure selection more consistent. [#68802](https://github.com/ClickHouse/ClickHouse/pull/68802) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Fix zero copy bug with encrypted disk and UNFREEZE. [#68821](https://github.com/ClickHouse/ClickHouse/pull/68821) ([Joe Lynch](https://github.com/joelynch)).
|
||||
* Speed up functions `lowerUTF8`/`upperUTF8`. [#68822](https://github.com/ClickHouse/ClickHouse/pull/68822) ([李扬](https://github.com/taiyang-li)).
|
||||
* Convert integration test `test_incorrect_datetime_format` to stateless. [#68823](https://github.com/ClickHouse/ClickHouse/pull/68823) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix Function Typo. [#68835](https://github.com/ClickHouse/ClickHouse/pull/68835) ([Shichao](https://github.com/jsc0218)).
|
||||
* Fix test `03228_virtual_column_merge_dist`. [#68841](https://github.com/ClickHouse/ClickHouse/pull/68841) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix test `03221_mutation_analyzer_skip_part`. [#68842](https://github.com/ClickHouse/ClickHouse/pull/68842) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix flaky `02932_analyzer_rewrite_sum_column_and_constant `. [#68843](https://github.com/ClickHouse/ClickHouse/pull/68843) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Update README.md - Add Austin meetup. [#68845](https://github.com/ClickHouse/ClickHouse/pull/68845) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Fix ssl handshake error processing. [#68866](https://github.com/ClickHouse/ClickHouse/pull/68866) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Speedup test 00653_verification_monotonic_data_load. [#68878](https://github.com/ClickHouse/ClickHouse/pull/68878) ([vdimir](https://github.com/vdimir)).
|
||||
* ci: add IPv6 support to `NetworkManager`. [#68900](https://github.com/ClickHouse/ClickHouse/pull/68900) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* ci: add checks for `iptables-nft` in integration test runner. [#68902](https://github.com/ClickHouse/ClickHouse/pull/68902) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Fix `02378_part_log_profile_events` flakiness. [#68917](https://github.com/ClickHouse/ClickHouse/pull/68917) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Make long_parquet* tests less long. [#68918](https://github.com/ClickHouse/ClickHouse/pull/68918) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix 01114_database_atomic flakiness. [#68930](https://github.com/ClickHouse/ClickHouse/pull/68930) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* CI: Fix job rerun check. [#68931](https://github.com/ClickHouse/ClickHouse/pull/68931) ([Max K.](https://github.com/maxknv)).
|
||||
* perf tests set cgroups_memory_usage_observer_wait_time to zero. [#68957](https://github.com/ClickHouse/ClickHouse/pull/68957) ([vdimir](https://github.com/vdimir)).
|
||||
* Revert un-quoting `auto` settings values. Fixes [#68748](https://github.com/ClickHouse/ClickHouse/issues/68748). [#68979](https://github.com/ClickHouse/ClickHouse/pull/68979) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Actually fix false "Killed by signal (output files)" in stress_tests.lib. [#68983](https://github.com/ClickHouse/ClickHouse/pull/68983) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* no-tsan in 02735_parquet_encoder. [#68984](https://github.com/ClickHouse/ClickHouse/pull/68984) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update CHANGELOG.md. [#68994](https://github.com/ClickHouse/ClickHouse/pull/68994) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix test_role & test_keeper_s3_snapshot integration tests. [#69013](https://github.com/ClickHouse/ClickHouse/pull/69013) ([Shankar](https://github.com/shiyer7474)).
|
||||
* fix false leak detect in libfiu. [#69018](https://github.com/ClickHouse/ClickHouse/pull/69018) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Update README.md. [#69041](https://github.com/ClickHouse/ClickHouse/pull/69041) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Update parametric-functions.md. [#69055](https://github.com/ClickHouse/ClickHouse/pull/69055) ([Samuel Warfield](https://github.com/Warfields)).
|
||||
* Disallow `ALTER TABLE ADD VECTOR SIMILARITY INDEX` if corresponding setting is not enabled. [#69065](https://github.com/ClickHouse/ClickHouse/pull/69065) ([flynn](https://github.com/ucasfl)).
|
||||
* Remove stale moving parts without zookeeper. [#69075](https://github.com/ClickHouse/ClickHouse/pull/69075) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Update README.md. [#69077](https://github.com/ClickHouse/ClickHouse/pull/69077) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Fix typo in ActionsDag. [#69086](https://github.com/ClickHouse/ClickHouse/pull/69086) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Follow-up for https://github.com/ClickHouse/ClickHouse/pull/68332. [#69099](https://github.com/ClickHouse/ClickHouse/pull/69099) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix subnet in docker_compose_net.yml. [#69121](https://github.com/ClickHouse/ClickHouse/pull/69121) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Fix: expression description in plan after lift up union optimization. [#69124](https://github.com/ClickHouse/ClickHouse/pull/69124) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix locking in UserDefinedSQLObjectsZooKeeperStorage.cpp. [#69132](https://github.com/ClickHouse/ClickHouse/pull/69132) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Do not use docker pause for Kerberos KDC container in integration tests. [#69136](https://github.com/ClickHouse/ClickHouse/pull/69136) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Don't create Object type if use_json_alias_for_old_object_type=1 but allow_experimental_object_type=0. [#69150](https://github.com/ClickHouse/ClickHouse/pull/69150) ([Pavel Kruglov](https://github.com/Avogar)).
|
||||
* Use QueryPlan for merge. [#69167](https://github.com/ClickHouse/ClickHouse/pull/69167) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Update version_date.tsv and changelog after v24.3.10.33-lts. [#69177](https://github.com/ClickHouse/ClickHouse/pull/69177) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* CI: Rerun check: do not check if manual rerun. [#69181](https://github.com/ClickHouse/ClickHouse/pull/69181) ([Max K.](https://github.com/maxknv)).
|
||||
* Update version_date.tsv and changelog after v24.5.7.31-stable. [#69182](https://github.com/ClickHouse/ClickHouse/pull/69182) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.6.5.30-stable. [#69184](https://github.com/ClickHouse/ClickHouse/pull/69184) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix flaky 02915_move_partition_inactive_replica. [#69190](https://github.com/ClickHouse/ClickHouse/pull/69190) ([alesapin](https://github.com/alesapin)).
|
||||
* Update version_date.tsv and changelog after v24.8.3.59-lts. [#69191](https://github.com/ClickHouse/ClickHouse/pull/69191) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Disable memory test with sanitizer. [#69193](https://github.com/ClickHouse/ClickHouse/pull/69193) ([alesapin](https://github.com/alesapin)).
|
||||
* Disable perf-like test with sanitizers. [#69194](https://github.com/ClickHouse/ClickHouse/pull/69194) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix jepsen for aarch64. [#69199](https://github.com/ClickHouse/ClickHouse/pull/69199) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Test for parallel replicas, reverse in order reading mode. [#69202](https://github.com/ClickHouse/ClickHouse/pull/69202) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Collect sanitizer report from client to `client_log`. [#69204](https://github.com/ClickHouse/ClickHouse/pull/69204) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix flaky 00157_cache_dictionary. [#69205](https://github.com/ClickHouse/ClickHouse/pull/69205) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Hide Settings implementation. [#69213](https://github.com/ClickHouse/ClickHouse/pull/69213) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix progress bar when reading from Memory tables. [#69234](https://github.com/ClickHouse/ClickHouse/pull/69234) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* CMake: Update ICU build description. [#69240](https://github.com/ClickHouse/ClickHouse/pull/69240) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add testcase for ANN index usage with subquery. [#69241](https://github.com/ClickHouse/ClickHouse/pull/69241) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backports should read tags from its repo only. [#69252](https://github.com/ClickHouse/ClickHouse/pull/69252) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* 01114_database_atomic: Increase time frames to reduce flakiness. [#69255](https://github.com/ClickHouse/ClickHouse/pull/69255) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix dropping of file cache in CHECK query in case of enabled transactions. [#69256](https://github.com/ClickHouse/ClickHouse/pull/69256) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* CI: Merge stress and func runners type. [#69257](https://github.com/ClickHouse/ClickHouse/pull/69257) ([Max K.](https://github.com/maxknv)).
|
||||
* Make infrastructure related scripts private. [#69260](https://github.com/ClickHouse/ClickHouse/pull/69260) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix: Not-ready Set with parallel replicas. [#69264](https://github.com/ClickHouse/ClickHouse/pull/69264) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Revert "CREATE TABLE AS copy PRIMARY KEY, ORDER BY, and similar clauses.". [#69268](https://github.com/ClickHouse/ClickHouse/pull/69268) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Use CANNOT_DECOMPRESS error code for the compressed external data in CompressedReadBufferBase for checksum validation and decompression. [#69269](https://github.com/ClickHouse/ClickHouse/pull/69269) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Fix the error in loading client suggestions and select from `system.function` when `allow_experimental_nlp_functions` setting is enabled and no lemmatizers are specified in the configuration. [#69271](https://github.com/ClickHouse/ClickHouse/pull/69271) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Improve logical error trace for TryResult. [#69297](https://github.com/ClickHouse/ClickHouse/pull/69297) ([Pablo Marcos](https://github.com/pamarcos)).
|
||||
* Just a small refactoring. [#69298](https://github.com/ClickHouse/ClickHouse/pull/69298) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Don't run 01287_max_execution_speed with slow builds. [#69299](https://github.com/ClickHouse/ClickHouse/pull/69299) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add checks against segfault in DeltaLakeMetadata. [#69305](https://github.com/ClickHouse/ClickHouse/pull/69305) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Update StackTrace.cpp. [#69306](https://github.com/ClickHouse/ClickHouse/pull/69306) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Bump libarchive from v3.7.0 to v3.7.1. [#69318](https://github.com/ClickHouse/ClickHouse/pull/69318) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Bump curl to v8.9.1. [#69321](https://github.com/ClickHouse/ClickHouse/pull/69321) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* These tests take long time to execute with slow builds (either 700 or 400 seconds). [#69322](https://github.com/ClickHouse/ClickHouse/pull/69322) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Bump grpc to v1.59.5. [#69323](https://github.com/ClickHouse/ClickHouse/pull/69323) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Speed up some Kafka tests with multiprocessing. [#69324](https://github.com/ClickHouse/ClickHouse/pull/69324) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Minor follow-up for [#66933](https://github.com/ClickHouse/ClickHouse/issues/66933). [#69326](https://github.com/ClickHouse/ClickHouse/pull/69326) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Randomize integration tests settings. [#69328](https://github.com/ClickHouse/ClickHouse/pull/69328) ([vdimir](https://github.com/vdimir)).
|
||||
* Update version_date.tsv and changelog after v24.3.11.7-lts. [#69330](https://github.com/ClickHouse/ClickHouse/pull/69330) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.5.8.10-stable. [#69333](https://github.com/ClickHouse/ClickHouse/pull/69333) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Add CITATION.cff. [#69334](https://github.com/ClickHouse/ClickHouse/pull/69334) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Update version_date.tsv and changelog after v24.6.6.6-stable. [#69335](https://github.com/ClickHouse/ClickHouse/pull/69335) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix a leak of a few bytes in function `lower/upperUTF8`. [#69336](https://github.com/ClickHouse/ClickHouse/pull/69336) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Update version_date.tsv and changelog after v24.7.6.8-stable. [#69337](https://github.com/ClickHouse/ClickHouse/pull/69337) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelog after v24.8.4.13-lts. [#69340](https://github.com/ClickHouse/ClickHouse/pull/69340) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Bump OpenSSL to v3.2.3. [#69341](https://github.com/ClickHouse/ClickHouse/pull/69341) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. V. [#69344](https://github.com/ClickHouse/ClickHouse/pull/69344) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Bump libuv to v1.48.0. [#69345](https://github.com/ClickHouse/ClickHouse/pull/69345) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Update README.md - Meetups. [#69359](https://github.com/ClickHouse/ClickHouse/pull/69359) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. VI. [#69361](https://github.com/ClickHouse/ClickHouse/pull/69361) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Bump libarchive to v3.7.4. [#69367](https://github.com/ClickHouse/ClickHouse/pull/69367) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Rename `count_min` statistics to `countmin`. [#69377](https://github.com/ClickHouse/ClickHouse/pull/69377) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Refactor temporary file usage in MergeTask with QueryPlan. [#69383](https://github.com/ClickHouse/ClickHouse/pull/69383) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix type mismatch. [#69385](https://github.com/ClickHouse/ClickHouse/pull/69385) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix 24.8 setting compatibility `rows_before_aggregation`. [#69394](https://github.com/ClickHouse/ClickHouse/pull/69394) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Support more oss endpoints. [#69400](https://github.com/ClickHouse/ClickHouse/pull/69400) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix 01603_read_with_backoff_bug. [#69413](https://github.com/ClickHouse/ClickHouse/pull/69413) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* See if it would address strange issue with `Resource temporarily unavailable`. [#69416](https://github.com/ClickHouse/ClickHouse/pull/69416) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Added a server setting `database_replicated_allow_detach_permanently` that disallows `DETACH TABLE PERMANENTLY` queries in Replicated databases (allowed by default). [#69422](https://github.com/ClickHouse/ClickHouse/pull/69422) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* See https://s3.amazonaws.com/clickhouse-test-reports/69093/d9b1fb7a7cb0813dd12d8f73423662e02a458056/fast_test.html. [#69430](https://github.com/ClickHouse/ClickHouse/pull/69430) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix test_disks_app_func. [#69449](https://github.com/ClickHouse/ClickHouse/pull/69449) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fixed consistency for `groupConcat` with `arrayStringConcat` in case of `FixedString` as argument. [#69455](https://github.com/ClickHouse/ClickHouse/pull/69455) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* The fix from [#69416](https://github.com/ClickHouse/ClickHouse/issues/69416) was incomplete. Reduce the size of printed statement to 4k max. [#69483](https://github.com/ClickHouse/ClickHouse/pull/69483) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* CMake: Add comment about ICU data files. [#69511](https://github.com/ClickHouse/ClickHouse/pull/69511) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* S3Queue: small refactoring. [#69513](https://github.com/ClickHouse/ClickHouse/pull/69513) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backport MergeTask changes from private to minimize merge conflicts. [#69525](https://github.com/ClickHouse/ClickHouse/pull/69525) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix getting the latest synced commit for long PRs. [#69538](https://github.com/ClickHouse/ClickHouse/pull/69538) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* When trying to ATTACH ReplicatedMergeTree tables that were previously PERMANENTLY DETACHED, we will fail due to explicitly specified parameters. Reference https://github.com/ClickHouse/ClickHouse/pull/66104. [#69539](https://github.com/ClickHouse/ClickHouse/pull/69539) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix ci upgrade check failure for `join_to_sort` settings in pr https://github.com/ClickHouse/ClickHouse/pull/60341. [#69554](https://github.com/ClickHouse/ClickHouse/pull/69554) ([kevinyhzou](https://github.com/KevinyhZou)).
|
||||
* Add function `kill_ci_runner`. Kill runner when pre-pull failed. [#69557](https://github.com/ClickHouse/ClickHouse/pull/69557) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Replace libpq code dump by postgresql fork + bump to v14.3. [#69564](https://github.com/ClickHouse/ClickHouse/pull/69564) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Mask azure connection string sensitive info. [#69570](https://github.com/ClickHouse/ClickHouse/pull/69570) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Don't leave an empty znode when replicated table is dropped. [#69574](https://github.com/ClickHouse/ClickHouse/pull/69574) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Enable removerecursive in ci. [#69578](https://github.com/ClickHouse/ClickHouse/pull/69578) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Bump libpqxx to v7.7.5. [#69580](https://github.com/ClickHouse/ClickHouse/pull/69580) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove superfluous `--multiquery/-n`, pt. V. [#69581](https://github.com/ClickHouse/ClickHouse/pull/69581) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* A better fix for [#67476](https://github.com/ClickHouse/ClickHouse/issues/67476). [#69583](https://github.com/ClickHouse/ClickHouse/pull/69583) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add more contexts to the debug action and use it broadly. [#69599](https://github.com/ClickHouse/ClickHouse/pull/69599) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Try to fix data race in `WriteBufferFromHTTPServerResponse`. [#69601](https://github.com/ClickHouse/ClickHouse/pull/69601) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Remove explicit announce from local replica in ReadFromMergeTree. [#69602](https://github.com/ClickHouse/ClickHouse/pull/69602) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Unification of FunctionSecretArgumentsFinder. [#69615](https://github.com/ClickHouse/ClickHouse/pull/69615) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Reorder some tests (follow-up to [#69514](https://github.com/ClickHouse/ClickHouse/issues/69514)). [#69626](https://github.com/ClickHouse/ClickHouse/pull/69626) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix invalid `clickhouse-format` invocation in tests. [#69642](https://github.com/ClickHouse/ClickHouse/pull/69642) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Try fix `02447_drop_database_replica`. [#69655](https://github.com/ClickHouse/ClickHouse/pull/69655) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* S3Queue small refactoring. [#69672](https://github.com/ClickHouse/ClickHouse/pull/69672) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Update assert. [#69673](https://github.com/ClickHouse/ClickHouse/pull/69673) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Bump libpq from v14.3 to v15.8. [#69674](https://github.com/ClickHouse/ClickHouse/pull/69674) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Try fix asserts failure in `HashJoin`. [#69682](https://github.com/ClickHouse/ClickHouse/pull/69682) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Prohibit `ALTER TABLE ... ADD INDEX ... TYPE` inverted if setting = 0. [#69684](https://github.com/ClickHouse/ClickHouse/pull/69684) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fixed 2 bugs in Remove Recursive implementation:. [#69690](https://github.com/ClickHouse/ClickHouse/pull/69690) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* New profile events for parallel replicas. [#69706](https://github.com/ClickHouse/ClickHouse/pull/69706) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Update README.md - Meetups. [#69714](https://github.com/ClickHouse/ClickHouse/pull/69714) ([Tanya Bragin](https://github.com/tbragin)).
|
||||
* added some edge cases for printf tests. [#69737](https://github.com/ClickHouse/ClickHouse/pull/69737) ([max-vostrikov](https://github.com/max-vostrikov)).
|
||||
* Bump libpq to v16.4. [#69741](https://github.com/ClickHouse/ClickHouse/pull/69741) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix parallel replicas protocol after [#68424](https://github.com/ClickHouse/ClickHouse/issues/68424). [#69744](https://github.com/ClickHouse/ClickHouse/pull/69744) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* CI: Include PostgreSQL in sparse checkout script. [#69746](https://github.com/ClickHouse/ClickHouse/pull/69746) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Get rid of the import cycle when env_helper can't be imported from `ci_*.py` files. Now, `env_helper` won't import download helper or basically anything from the `ci` module. The report is the best place for the GH job info since we use it mostly there. [#69750](https://github.com/ClickHouse/ClickHouse/pull/69750) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Save CREATE QUERY with the KeeperMap engine with evaluated parameters. It is important because, for example, we can use the `currentDatabase()` function in the KeeperMap `zk_root_path` argument, and we want it to point to the same path even if we move this table to another database. [#69751](https://github.com/ClickHouse/ClickHouse/pull/69751) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix native macOS build. [#69767](https://github.com/ClickHouse/ClickHouse/pull/69767) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#69974](https://github.com/ClickHouse/ClickHouse/issues/69974): S3Queue: support having deprecated settings to not fail server startup. [#69769](https://github.com/ClickHouse/ClickHouse/pull/69769) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Limit fast tests to 30 seconds. [#69781](https://github.com/ClickHouse/ClickHouse/pull/69781) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69824](https://github.com/ClickHouse/ClickHouse/issues/69824): Allow cyrillic characters in generated contributor names. [#69820](https://github.com/ClickHouse/ClickHouse/pull/69820) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#69912](https://github.com/ClickHouse/ClickHouse/issues/69912): Add Proj Obsolete Setting. [#69883](https://github.com/ClickHouse/ClickHouse/pull/69883) ([Shichao](https://github.com/jsc0218)).
|
||||
* Backported in [#69899](https://github.com/ClickHouse/ClickHouse/issues/69899): Revert "Merge pull request [#69032](https://github.com/ClickHouse/ClickHouse/issues/69032) from alexon1234/include_real_time_execution_in_http_header". [#69885](https://github.com/ClickHouse/ClickHouse/pull/69885) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#69931](https://github.com/ClickHouse/ClickHouse/issues/69931): RIPE is an acronym and thus should be capital. RIPE stands for **R**ACE **I**ntegrity **P**rimitives **E**valuation and RACE stands for **R**esearch and Development in **A**dvanced **C**ommunications **T**echnologies in **E**urope. [#69901](https://github.com/ClickHouse/ClickHouse/pull/69901) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Backported in [#70034](https://github.com/ClickHouse/ClickHouse/issues/70034): Revert "Add RIPEMD160 function". [#70005](https://github.com/ClickHouse/ClickHouse/pull/70005) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
@ -232,7 +232,7 @@ Below you can find some quick links which may be useful when writing code for Cl
|
||||
- [The code style guide](https://clickhouse.com/docs/en/development/style/).
|
||||
- [Adding third-party libraries](https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries)
|
||||
- [Writing tests](https://clickhouse.com/docs/en/development/tests/)
|
||||
- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest)
|
||||
- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22)
|
||||
|
||||
## Writing Documentation {#writing-documentation}
|
||||
|
||||
|
@ -6,7 +6,15 @@ sidebar_label: MongoDB
|
||||
|
||||
# MongoDB
|
||||
|
||||
MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported.
|
||||
MongoDB engine is read-only table engine which allows to read data from remote [MongoDB](https://www.mongodb.com/) collection.
|
||||
|
||||
Only MongoDB v3.6+ servers are supported.
|
||||
[Seed list(`mongodb**+srv**`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported.
|
||||
|
||||
:::note
|
||||
If you're facing troubles, please report the issue, and try to use [the legacy implementation](../../../operations/server-configuration-parameters/settings.md#use_legacy_mongodb_integration).
|
||||
Keep in mind that it is deprecated, and will be removed in next releases.
|
||||
:::
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
@ -34,55 +42,147 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
- `options` — MongoDB connection string options (optional parameter).
|
||||
|
||||
:::tip
|
||||
If you are using the MongoDB Atlas cloud offering:
|
||||
If you are using the MongoDB Atlas cloud offering connection url can be obtained from 'Atlas SQL' option.
|
||||
Seed list(`mongodb**+srv**`) is not yet supported, but will be added in future releases.
|
||||
:::
|
||||
|
||||
Also, you can simply pass a URI:
|
||||
|
||||
``` sql
|
||||
ENGINE = MongoDB(uri, collection);
|
||||
```
|
||||
- connection url can be obtained from 'Atlas SQL' option
|
||||
- use options: 'connectTimeoutMS=10000&ssl=true&authSource=admin'
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `uri` — MongoDB server's connection URI
|
||||
|
||||
- `collection` — Remote collection name.
|
||||
|
||||
|
||||
## Types mappings
|
||||
|
||||
| MongoDB | ClickHouse |
|
||||
|--------------------|-----------------------------------------------------------------------|
|
||||
| bool, int32, int64 | *any numeric type*, String |
|
||||
| double | Float64, String |
|
||||
| date | Date, Date32, DateTime, DateTime64, String |
|
||||
| string | String, UUID |
|
||||
| document | String(as JSON) |
|
||||
| array | Array, String(as JSON) |
|
||||
| oid | String |
|
||||
| binary | String if in column, base64 encoded string if in an array or document |
|
||||
| *any other* | String |
|
||||
|
||||
If key is not found in MongoDB document (for example, column name doesn't match), default value or `NULL` (if the column is nullable) will be inserted.
|
||||
|
||||
## Supported clauses
|
||||
|
||||
Only queries with simple expressions are supported (for example, `WHERE field = <constant> ORDER BY field2 LIMIT <constant>`).
|
||||
Such expressions are translated to MongoDB query language and executed on the server side.
|
||||
You can disable all these restriction, using [mongodb_throw_on_unsupported_query](../../../operations/settings/settings.md#mongodb_throw_on_unsupported_query).
|
||||
In that case ClickHouse tries to convert query on best effort basis, but it can lead to full table scan and processing on ClickHouse side.
|
||||
|
||||
:::note
|
||||
It's always better to explicitly set type of literal because Mongo requires strict typed filters.\
|
||||
For example you want to filter by `Date`:
|
||||
|
||||
```sql
|
||||
SELECT * FROM mongo_table WHERE date = '2024-01-01'
|
||||
```
|
||||
|
||||
This will not work because Mongo will not cast string to `Date`, so you need to cast it manually:
|
||||
|
||||
```sql
|
||||
SELECT * FROM mongo_table WHERE date = '2024-01-01'::Date OR date = toDate('2024-01-01')
|
||||
```
|
||||
|
||||
This applied for `Date`, `Date32`, `DateTime`, `Bool`, `UUID`.
|
||||
|
||||
:::
|
||||
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
|
||||
Assuming MongoDB has [sample_mflix](https://www.mongodb.com/docs/atlas/sample-data/sample-mflix) dataset loaded
|
||||
|
||||
Create a table in ClickHouse which allows to read data from MongoDB collection:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE mongo_table
|
||||
CREATE TABLE sample_mflix_table
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
|
||||
```
|
||||
|
||||
To read from an SSL secured MongoDB server:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE mongo_table_ssl
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true');
|
||||
_id String,
|
||||
title String,
|
||||
plot String,
|
||||
genres Array(String),
|
||||
directors Array(String),
|
||||
writers Array(String),
|
||||
released Date,
|
||||
imdb String,
|
||||
year String,
|
||||
) ENGINE = MongoDB('mongodb://<USERNAME>:<PASSWORD>@atlas-sql-6634be87cefd3876070caf96-98lxs.a.query.mongodb.net/sample_mflix?ssl=true&authSource=admin', 'movies');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT COUNT() FROM mongo_table;
|
||||
SELECT count() FROM sample_mflix_table
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 4 │
|
||||
└─────────┘
|
||||
┌─count()─┐
|
||||
1. │ 21349 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
You can also adjust connection timeout:
|
||||
```SQL
|
||||
-- JSONExtractString cannot be pushed down to MongoDB
|
||||
SET mongodb_throw_on_unsupported_query = 0;
|
||||
|
||||
``` sql
|
||||
CREATE TABLE mongo_table
|
||||
(
|
||||
key UInt64,
|
||||
data String
|
||||
) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'connectTimeoutMS=100000');
|
||||
-- Find all 'Back to the Future' sequels with rating > 7.5
|
||||
SELECT title, plot, genres, directors, released FROM sample_mflix_table
|
||||
WHERE title IN ('Back to the Future', 'Back to the Future Part II', 'Back to the Future Part III')
|
||||
AND toFloat32(JSONExtractString(imdb, 'rating')) > 7.5
|
||||
ORDER BY year
|
||||
FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
Row 1:
|
||||
──────
|
||||
title: Back to the Future
|
||||
plot: A young man is accidentally sent 30 years into the past in a time-traveling DeLorean invented by his friend, Dr. Emmett Brown, and must make sure his high-school-age parents unite in order to save his own existence.
|
||||
genres: ['Adventure','Comedy','Sci-Fi']
|
||||
directors: ['Robert Zemeckis']
|
||||
released: 1985-07-03
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
title: Back to the Future Part II
|
||||
plot: After visiting 2015, Marty McFly must repeat his visit to 1955 to prevent disastrous changes to 1985... without interfering with his first trip.
|
||||
genres: ['Action','Adventure','Comedy']
|
||||
directors: ['Robert Zemeckis']
|
||||
released: 1989-11-22
|
||||
```
|
||||
|
||||
```SQL
|
||||
-- Find top 3 movies based on Cormac McCarthy's books
|
||||
SELECT title, toFloat32(JSONExtractString(imdb, 'rating')) as rating
|
||||
FROM sample_mflix_table
|
||||
WHERE arrayExists(x -> x like 'Cormac McCarthy%', writers)
|
||||
ORDER BY rating DESC
|
||||
LIMIT 3;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─title──────────────────┬─rating─┐
|
||||
1. │ No Country for Old Men │ 8.1 │
|
||||
2. │ The Sunset Limited │ 7.4 │
|
||||
3. │ The Road │ 7.3 │
|
||||
└────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
You can see the generated MongoDB query in DEBUG level logs.
|
||||
|
||||
Implementation details can be found in [mongocxx](https://github.com/mongodb/mongo-cxx-driver) and [mongoc](https://github.com/mongodb/mongo-c-driver) documentations.
|
||||
|
@ -1,17 +1,20 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/postgresql
|
||||
title: PostgreSQL Table Engine
|
||||
sidebar_position: 160
|
||||
sidebar_label: PostgreSQL
|
||||
---
|
||||
|
||||
# PostgreSQL
|
||||
|
||||
The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data that is stored on a remote PostgreSQL server.
|
||||
The PostgreSQL engine allows `SELECT` and `INSERT` queries on data stored on a remote PostgreSQL server.
|
||||
|
||||
:::note
|
||||
Currently, only PostgreSQL versions 12 and up are supported.
|
||||
:::
|
||||
|
||||
:::note Replicating or migrating Postgres data with with PeerDB
|
||||
> In addition to the Postgres table engine, you can use [PeerDB](https://docs.peerdb.io/introduction) by ClickHouse to set up a continuous data pipeline from Postgres to ClickHouse. PeerDB is a tool designed specifically to replicate data from Postgres to ClickHouse using change data capture (CDC).
|
||||
:::
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
|
@ -191,6 +191,15 @@ For 'Ordered' mode. Available since `24.6`. If there are several replicas of S3Q
|
||||
Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md).
|
||||
|
||||
|
||||
## S3Queue Ordered mode {#ordered-mode}
|
||||
|
||||
`S3Queue` processing mode allows to store less metadata in ZooKeeper, but has a limitation that files, which added later by time, are required to have alphanumerically bigger names.
|
||||
|
||||
`S3Queue` `ordered` mode, as well as `unordered`, supports `(s3queue_)processing_threads_num` setting (`s3queue_` prefix is optional), which allows to control number of threads, which would do processing of `S3` files locally on the server.
|
||||
In addition, `ordered` mode also introduces another setting called `(s3queue_)buckets` which means "logical threads". It means that in distributed scenario, when there are several servers with `S3Queue` table replicas, where this setting defines the number of processing units. E.g. each processing thread on each `S3Queue` replica will try to lock a certain `bucket` for processing, each `bucket` is attributed to certain files by hash of the file name. Therefore, in distributed scenario it is highly recommended to have `(s3queue_)buckets` setting to be at least equal to the number of replicas or bigger. This is fine to have the number of buckets bigger than the number of replicas. The most optimal scenario would be for `(s3queue_)buckets` setting to equal a multiplication of `number_of_replicas` and `(s3queue_)processing_threads_num`.
|
||||
The setting `(s3queue_)processing_threads_num` is not recommended for usage before version `24.6`.
|
||||
The setting `(s3queue_)buckets` is available starting with version `24.6`.
|
||||
|
||||
## Description {#description}
|
||||
|
||||
`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this:
|
||||
|
@ -79,7 +79,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me
|
||||
|
||||
## Rollup Configuration {#rollup-configuration}
|
||||
|
||||
The settings for rollup are defined by the [graphite_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
|
||||
The settings for rollup are defined by the [graphite_rollup](../../../operations/server-configuration-parameters/settings.md#graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
|
||||
|
||||
Rollup configuration structure:
|
||||
|
||||
|
@ -710,7 +710,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
|
||||
### Terms {#terms}
|
||||
|
||||
- Disk — Block device mounted to the filesystem.
|
||||
- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-path) server setting.
|
||||
- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#path) server setting.
|
||||
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
|
||||
- Storage policy — Set of volumes and the rules for moving data between them.
|
||||
|
||||
|
@ -127,7 +127,7 @@ By default, an INSERT query waits for confirmation of writing the data from only
|
||||
|
||||
Each block of data is written atomically. The INSERT query is divided into blocks up to `max_insert_block_size = 1048576` rows. In other words, if the `INSERT` query has less than 1048576 rows, it is made atomically.
|
||||
|
||||
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application does not know if the data was written to the DB, so the `INSERT` query can simply be repeated. It does not matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge_tree](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-merge_tree) server settings.
|
||||
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application does not know if the data was written to the DB, so the `INSERT` query can simply be repeated. It does not matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge_tree](/docs/en/operations/server-configuration-parameters/settings.md/#merge_tree) server settings.
|
||||
|
||||
During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.)
|
||||
|
||||
|
@ -7,7 +7,7 @@ description: Analyzing Stack Overflow data with ClickHouse
|
||||
|
||||
# Analyzing Stack Overflow data with ClickHouse
|
||||
|
||||
This dataset contains every `Post`, `User`, `Vote`, `Comment`, `Badge, `PostHistory`, and `PostLink` that has occurred on Stack Overflow.
|
||||
This dataset contains every `Posts`, `Users`, `Votes`, `Comments`, `Badges`, `PostHistory`, and `PostLinks` that has occurred on Stack Overflow.
|
||||
|
||||
Users can either download pre-prepared Parquet versions of the data, containing every post up to April 2024, or download the latest data in XML format and load this. Stack Overflow provide updates to this data periodically - historically every 3 months.
|
||||
|
||||
@ -159,7 +159,7 @@ INSERT INTO stackoverflow.badges SELECT * FROM s3('https://datasets-documentatio
|
||||
0 rows in set. Elapsed: 6.635 sec. Processed 51.29 million rows, 797.05 MB (7.73 million rows/s., 120.13 MB/s.)
|
||||
```
|
||||
|
||||
### `PostLinks`
|
||||
### PostLinks
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.postlinks
|
||||
@ -178,7 +178,7 @@ INSERT INTO stackoverflow.postlinks SELECT * FROM s3('https://datasets-documenta
|
||||
0 rows in set. Elapsed: 1.534 sec. Processed 6.55 million rows, 129.70 MB (4.27 million rows/s., 84.57 MB/s.)
|
||||
```
|
||||
|
||||
### `PostHistory`
|
||||
### PostHistory
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.posthistory
|
||||
|
@ -1,35 +1,38 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/star-schema
|
||||
sidebar_label: Star Schema Benchmark
|
||||
description: "Dataset based on the TPC-H dbgen source. The coding style and architecture
|
||||
follows the TPCH dbgen."
|
||||
description: "The Star Schema Benchmark (SSB) data set and queries"
|
||||
---
|
||||
|
||||
# Star Schema Benchmark
|
||||
# Star Schema Benchmark (SSB, 2009)
|
||||
|
||||
The Star Schema Benchmark is roughly based on the [TPC-H](tpch.md)'s tables and queries but unlike TPC-H, it uses a star schema layout.
|
||||
The bulk of the data sits in a gigantic fact table which is surrounded by multiple small dimension tables.
|
||||
The queries joined the fact table with one or more dimension tables to apply filter criteria, e.g. `MONTH = 'JANUARY'`.
|
||||
|
||||
Compiling dbgen:
|
||||
References:
|
||||
- [Star Schema Benchmark](https://cs.umb.edu/~poneil/StarSchemaB.pdf) (O'Neil et. al), 2009
|
||||
- [Variations of the Star Schema Benchmark to Test the Effects of Data Skew on Query Performance](https://doi.org/10.1145/2479871.2479927) (Rabl. et. al.), 2013
|
||||
|
||||
First, checkout the star schema benchmark repository and compile the data generator:
|
||||
|
||||
``` bash
|
||||
$ git clone git@github.com:vadimtk/ssb-dbgen.git
|
||||
$ cd ssb-dbgen
|
||||
$ make
|
||||
git clone https://github.com/vadimtk/ssb-dbgen.git
|
||||
cd ssb-dbgen
|
||||
make
|
||||
```
|
||||
|
||||
Generating data:
|
||||
|
||||
:::note
|
||||
With `-s 100` dbgen generates 600 million rows (67 GB), while while `-s 1000` it generates 6 billion rows (which takes a lot of time)
|
||||
:::
|
||||
Then, generate the data. Parameter `-s` specifies the scale factor. For example, with `-s 100`, 600 million rows are generated.
|
||||
|
||||
``` bash
|
||||
$ ./dbgen -s 1000 -T c
|
||||
$ ./dbgen -s 1000 -T l
|
||||
$ ./dbgen -s 1000 -T p
|
||||
$ ./dbgen -s 1000 -T s
|
||||
./dbgen -s 1000 -T c
|
||||
./dbgen -s 1000 -T l
|
||||
./dbgen -s 1000 -T p
|
||||
./dbgen -s 1000 -T s
|
||||
./dbgen -s 1000 -T d
|
||||
```
|
||||
|
||||
Creating tables in ClickHouse:
|
||||
Now create tables in ClickHouse:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE customer
|
||||
@ -92,18 +95,42 @@ CREATE TABLE supplier
|
||||
S_PHONE String
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY S_SUPPKEY;
|
||||
|
||||
CREATE TABLE date
|
||||
(
|
||||
D_DATEKEY Date,
|
||||
D_DATE FixedString(18),
|
||||
D_DAYOFWEEK LowCardinality(String),
|
||||
D_MONTH LowCardinality(String),
|
||||
D_YEAR UInt16,
|
||||
D_YEARMONTHNUM UInt32,
|
||||
D_YEARMONTH LowCardinality(FixedString(7)),
|
||||
D_DAYNUMINWEEK UInt8,
|
||||
D_DAYNUMINMONTH UInt8,
|
||||
D_DAYNUMINYEAR UInt16,
|
||||
D_MONTHNUMINYEAR UInt8,
|
||||
D_WEEKNUMINYEAR UInt8,
|
||||
D_SELLINGSEASON String,
|
||||
D_LASTDAYINWEEKFL UInt8,
|
||||
D_LASTDAYINMONTHFL UInt8,
|
||||
D_HOLIDAYFL UInt8,
|
||||
D_WEEKDAYFL UInt8
|
||||
)
|
||||
ENGINE = MergeTree ORDER BY D_DATEKEY;
|
||||
```
|
||||
|
||||
Inserting data:
|
||||
The data can be imported as follows:
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
|
||||
$ clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
|
||||
$ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
|
||||
$ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
|
||||
clickhouse-client --query "INSERT INTO customer FORMAT CSV" < customer.tbl
|
||||
clickhouse-client --query "INSERT INTO part FORMAT CSV" < part.tbl
|
||||
clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
|
||||
clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl
|
||||
clickhouse-client --query "INSERT INTO date FORMAT CSV" < date.tbl
|
||||
```
|
||||
|
||||
Converting “star schema” to denormalized “flat schema”:
|
||||
In many use cases of ClickHouse, multiple tables are converted into a single denormalized flat table.
|
||||
This step is optional, below queries are listed in their original form and in a format rewritten for the denormalized table.
|
||||
|
||||
``` sql
|
||||
SET max_memory_usage = 20000000000;
|
||||
@ -155,42 +182,131 @@ INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
|
||||
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;
|
||||
```
|
||||
|
||||
Running the queries:
|
||||
The queries are generated by `./qgen -s <scaling_factor>`. Example queries for `s = 100`:
|
||||
|
||||
Q1.1
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
|
||||
FROM
|
||||
lineorder,
|
||||
date
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND D_YEAR = 1993
|
||||
AND LO_DISCOUNT BETWEEN 1 AND 3
|
||||
AND LO_QUANTITY < 25;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
``` sql
|
||||
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE toYear(LO_ORDERDATE) = 1993 AND LO_DISCOUNT BETWEEN 1 AND 3 AND LO_QUANTITY < 25;
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM
|
||||
lineorder_flat
|
||||
WHERE
|
||||
toYear(LO_ORDERDATE) = 1993
|
||||
AND LO_DISCOUNT BETWEEN 1 AND 3
|
||||
AND LO_QUANTITY < 25;
|
||||
```
|
||||
|
||||
Q1.2
|
||||
|
||||
``` sql
|
||||
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE toYYYYMM(LO_ORDERDATE) = 199401 AND LO_DISCOUNT BETWEEN 4 AND 6 AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
|
||||
FROM
|
||||
lineorder,
|
||||
date
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND D_YEARMONTHNUM = 199401
|
||||
AND LO_DISCOUNT BETWEEN 4 AND 6
|
||||
AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM
|
||||
lineorder_flat
|
||||
WHERE
|
||||
toYYYYMM(LO_ORDERDATE) = 199401
|
||||
AND LO_DISCOUNT BETWEEN 4 AND 6
|
||||
AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```
|
||||
|
||||
Q1.3
|
||||
|
||||
``` sql
|
||||
SELECT sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE toISOWeek(LO_ORDERDATE) = 6 AND toYear(LO_ORDERDATE) = 1994
|
||||
AND LO_DISCOUNT BETWEEN 5 AND 7 AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE*LO_DISCOUNT) AS REVENUE
|
||||
FROM
|
||||
lineorder,
|
||||
date
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND D_WEEKNUMINYEAR = 6
|
||||
AND D_YEAR = 1994
|
||||
AND LO_DISCOUNT BETWEEN 5 AND 7
|
||||
AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue
|
||||
FROM
|
||||
lineorder_flat
|
||||
WHERE
|
||||
toISOWeek(LO_ORDERDATE) = 6
|
||||
AND toYear(LO_ORDERDATE) = 1994
|
||||
AND LO_DISCOUNT BETWEEN 5 AND 7
|
||||
AND LO_QUANTITY BETWEEN 26 AND 35;
|
||||
```
|
||||
|
||||
Q2.1
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
FROM
|
||||
lineorder,
|
||||
date,
|
||||
part,
|
||||
supplier
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND P_CATEGORY = 'MFGR#12'
|
||||
AND S_REGION = 'AMERICA'
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
P_BRAND;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
P_BRAND
|
||||
FROM lineorder_flat
|
||||
WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA'
|
||||
WHERE
|
||||
P_CATEGORY = 'MFGR#12'
|
||||
AND S_REGION = 'AMERICA'
|
||||
GROUP BY
|
||||
year,
|
||||
P_BRAND
|
||||
@ -201,7 +317,34 @@ ORDER BY
|
||||
|
||||
Q2.2
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
FROM
|
||||
lineorder,
|
||||
date,
|
||||
part,
|
||||
supplier
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND P_BRAND BETWEEN
|
||||
'MFGR#2221' AND 'MFGR#2228'
|
||||
AND S_REGION = 'ASIA'
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
P_BRAND;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
@ -218,7 +361,33 @@ ORDER BY
|
||||
|
||||
Q2.3
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
FROM
|
||||
lineorder,
|
||||
date,
|
||||
part,
|
||||
supplier
|
||||
WHERE
|
||||
LO_ORDERDATE = D_DATEKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND P_BRAND = 'MFGR#2221'
|
||||
AND S_REGION = 'EUROPE'
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
P_BRAND
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
P_BRAND;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(LO_REVENUE),
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
@ -235,14 +404,46 @@ ORDER BY
|
||||
|
||||
Q3.1
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
C_NATION,
|
||||
S_NATION,
|
||||
D_YEAR,
|
||||
sum(LO_REVENUE) AS REVENUE
|
||||
FROM
|
||||
customer,
|
||||
lineorder,
|
||||
supplier,
|
||||
date
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND C_REGION = 'ASIA' AND S_REGION = 'ASIA'
|
||||
AND D_YEAR >= 1992 AND D_YEAR <= 1997
|
||||
GROUP BY
|
||||
C_NATION,
|
||||
S_NATION,
|
||||
D_YEAR
|
||||
ORDER BY
|
||||
D_YEAR ASC,
|
||||
REVENUE DESC;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
C_NATION,
|
||||
S_NATION,
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE C_REGION = 'ASIA' AND S_REGION = 'ASIA' AND year >= 1992 AND year <= 1997
|
||||
WHERE
|
||||
C_REGION = 'ASIA'
|
||||
AND S_REGION = 'ASIA'
|
||||
AND year >= 1992
|
||||
AND year <= 1997
|
||||
GROUP BY
|
||||
C_NATION,
|
||||
S_NATION,
|
||||
@ -254,14 +455,47 @@ ORDER BY
|
||||
|
||||
Q3.2
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR,
|
||||
sum(LO_REVENUE) AS REVENUE
|
||||
FROM
|
||||
customer,
|
||||
lineorder,
|
||||
supplier,
|
||||
date
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND C_NATION = 'UNITED STATES'
|
||||
AND S_NATION = 'UNITED STATES'
|
||||
AND D_YEAR >= 1992 AND D_YEAR <= 1997
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR
|
||||
ORDER BY
|
||||
D_YEAR ASC,
|
||||
REVENUE DESC;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE C_NATION = 'UNITED STATES' AND S_NATION = 'UNITED STATES' AND year >= 1992 AND year <= 1997
|
||||
WHERE
|
||||
C_NATION = 'UNITED STATES'
|
||||
AND S_NATION = 'UNITED STATES'
|
||||
AND year >= 1992
|
||||
AND year <= 1997
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
@ -273,14 +507,48 @@ ORDER BY
|
||||
|
||||
Q3.3
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM
|
||||
customer,
|
||||
lineorder,
|
||||
supplier,
|
||||
date
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5')
|
||||
AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5')
|
||||
AND D_YEAR >= 1992
|
||||
AND D_YEAR <= 1997
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR
|
||||
ORDER BY
|
||||
D_YEAR ASC,
|
||||
revenue DESC;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND year >= 1992 AND year <= 1997
|
||||
WHERE
|
||||
(C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5')
|
||||
AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5')
|
||||
AND year >= 1992
|
||||
AND year <= 1997
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
@ -292,14 +560,46 @@ ORDER BY
|
||||
|
||||
Q3.4
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM
|
||||
customer,
|
||||
lineorder,
|
||||
supplier,
|
||||
date
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND (C_CITY='UNITED KI1' OR C_CITY='UNITED KI5')
|
||||
AND (S_CITY='UNITED KI1' OR S_CITY='UNITED KI5')
|
||||
AND D_YEARMONTH = 'Dec1997'
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
D_YEAR
|
||||
ORDER BY
|
||||
D_YEAR ASC,
|
||||
revenue DESC;
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
sum(LO_REVENUE) AS revenue
|
||||
FROM lineorder_flat
|
||||
WHERE (C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5') AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5') AND toYYYYMM(LO_ORDERDATE) = 199712
|
||||
WHERE
|
||||
(C_CITY = 'UNITED KI1' OR C_CITY = 'UNITED KI5')
|
||||
AND (S_CITY = 'UNITED KI1' OR S_CITY = 'UNITED KI5')
|
||||
AND toYYYYMM(LO_ORDERDATE) = 199712
|
||||
GROUP BY
|
||||
C_CITY,
|
||||
S_CITY,
|
||||
@ -311,7 +611,36 @@ ORDER BY
|
||||
|
||||
Q4.1
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
D_YEAR,
|
||||
C_NATION,
|
||||
sum(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
|
||||
FROM
|
||||
date,
|
||||
customer,
|
||||
supplier,
|
||||
part,
|
||||
lineorder
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND C_REGION = 'AMERICA'
|
||||
AND S_REGION = 'AMERICA'
|
||||
AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
C_NATION
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
C_NATION
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
C_NATION,
|
||||
@ -328,14 +657,51 @@ ORDER BY
|
||||
|
||||
Q4.2
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
D_YEAR,
|
||||
S_NATION,
|
||||
P_CATEGORY,
|
||||
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
|
||||
FROM
|
||||
date,
|
||||
customer,
|
||||
supplier,
|
||||
part,
|
||||
lineorder
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND C_REGION = 'AMERICA'
|
||||
AND S_REGION = 'AMERICA'
|
||||
AND (D_YEAR = 1997 OR D_YEAR = 1998)
|
||||
AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
S_NATION,
|
||||
P_CATEGORY
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
S_NATION,
|
||||
P_CATEGORY
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
S_NATION,
|
||||
P_CATEGORY,
|
||||
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
|
||||
FROM lineorder_flat
|
||||
WHERE C_REGION = 'AMERICA' AND S_REGION = 'AMERICA' AND (year = 1997 OR year = 1998) AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
|
||||
WHERE
|
||||
C_REGION = 'AMERICA'
|
||||
AND S_REGION = 'AMERICA'
|
||||
AND (year = 1997 OR year = 1998)
|
||||
AND (P_MFGR = 'MFGR#1' OR P_MFGR = 'MFGR#2')
|
||||
GROUP BY
|
||||
year,
|
||||
S_NATION,
|
||||
@ -348,14 +714,51 @@ ORDER BY
|
||||
|
||||
Q4.3
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
D_YEAR,
|
||||
S_CITY,
|
||||
P_BRAND,
|
||||
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
|
||||
FROM
|
||||
date,
|
||||
customer,
|
||||
supplier,
|
||||
part,
|
||||
lineorder
|
||||
WHERE
|
||||
LO_CUSTKEY = C_CUSTKEY
|
||||
AND LO_SUPPKEY = S_SUPPKEY
|
||||
AND LO_PARTKEY = P_PARTKEY
|
||||
AND LO_ORDERDATE = D_DATEKEY
|
||||
AND C_REGION = 'AMERICA'
|
||||
AND S_NATION = 'UNITED STATES'
|
||||
AND (D_YEAR = 1997 OR D_YEAR = 1998)
|
||||
AND P_CATEGORY = 'MFGR#14'
|
||||
GROUP BY
|
||||
D_YEAR,
|
||||
S_CITY,
|
||||
P_BRAND
|
||||
ORDER BY
|
||||
D_YEAR,
|
||||
S_CITY,
|
||||
P_BRAND
|
||||
```
|
||||
|
||||
Denormalized table:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(LO_ORDERDATE) AS year,
|
||||
S_CITY,
|
||||
P_BRAND,
|
||||
sum(LO_REVENUE - LO_SUPPLYCOST) AS profit
|
||||
FROM lineorder_flat
|
||||
WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14'
|
||||
FROM
|
||||
lineorder_flat
|
||||
WHERE
|
||||
S_NATION = 'UNITED STATES'
|
||||
AND (year = 1997 OR year = 1998)
|
||||
AND P_CATEGORY = 'MFGR#14'
|
||||
GROUP BY
|
||||
year,
|
||||
S_CITY,
|
||||
@ -365,3 +768,4 @@ ORDER BY
|
||||
S_CITY ASC,
|
||||
P_BRAND ASC;
|
||||
```
|
||||
|
||||
|
596
docs/en/getting-started/example-datasets/tpcds.md
Normal file
596
docs/en/getting-started/example-datasets/tpcds.md
Normal file
@ -0,0 +1,596 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/tpcds
|
||||
sidebar_label: TPC-DS
|
||||
description: "The TPC-DS benchmark data set and queries."
|
||||
---
|
||||
|
||||
# TPC-DS (2012)
|
||||
|
||||
Similar to the [Star Schema Benchmark (SSB)](star-schema.md), TPC-DS is based on [TPC-H](tpch.md), but it took the opposite route, i.e. it expanded the number of joins needed by storing the data in a complex snowflake schema (24 instead of 8 tables).
|
||||
The data distribution is skewed (e.g. normal and Poisson distributions).
|
||||
It includes 99 reporting and ad-hoc queries with random substitutions.
|
||||
|
||||
References
|
||||
- [The Making of TPC-DS](https://dl.acm.org/doi/10.5555/1182635.1164217) (Nambiar), 2006
|
||||
|
||||
First, checkout the TPC-DS repository and compile the data generator:
|
||||
|
||||
``` bash
|
||||
git clone https://github.com/gregrahn/tpcds-kit.git
|
||||
cd tpcds-kit/tools
|
||||
make
|
||||
```
|
||||
|
||||
Then, generate the data. Parameter `-scale` specifies the scale factor.
|
||||
|
||||
``` bash
|
||||
./dsdgen -scale 1
|
||||
```
|
||||
|
||||
Then, generate the queries (use the same scale factor):
|
||||
|
||||
```bash
|
||||
./dsqgen -DIRECTORY ../query_templates/ -INPUT ../query_templates/templates.lst -SCALE 1 # generates 99 queries in out/query_0.sql
|
||||
```
|
||||
|
||||
Now create tables in ClickHouse.
|
||||
You can either use the original table definitions in tools/tpcds.sql or "tuned" table definitions with properly defined primary key indexes and LowCardinality-type column types where it makes sense.
|
||||
|
||||
```sql
|
||||
CREATE TABLE call_center(
|
||||
cc_call_center_sk Int64,
|
||||
cc_call_center_id LowCardinality(String),
|
||||
cc_rec_start_date Nullable(Date),
|
||||
cc_rec_end_date Nullable(Date),
|
||||
cc_closed_date_sk Nullable(UInt32),
|
||||
cc_open_date_sk Nullable(UInt32),
|
||||
cc_name LowCardinality(String),
|
||||
cc_class LowCardinality(String),
|
||||
cc_employees Int32,
|
||||
cc_sq_ft Int32,
|
||||
cc_hours LowCardinality(String),
|
||||
cc_manager LowCardinality(String),
|
||||
cc_mkt_id Int32,
|
||||
cc_mkt_class LowCardinality(String),
|
||||
cc_mkt_desc LowCardinality(String),
|
||||
cc_market_manager LowCardinality(String),
|
||||
cc_division Int32,
|
||||
cc_division_name LowCardinality(String),
|
||||
cc_company Int32,
|
||||
cc_company_name LowCardinality(String),
|
||||
cc_street_number LowCardinality(String),
|
||||
cc_street_name LowCardinality(String),
|
||||
cc_street_type LowCardinality(String),
|
||||
cc_suite_number LowCardinality(String),
|
||||
cc_city LowCardinality(String),
|
||||
cc_county LowCardinality(String),
|
||||
cc_state LowCardinality(String),
|
||||
cc_zip LowCardinality(String),
|
||||
cc_country LowCardinality(String),
|
||||
cc_gmt_offset Decimal(7,2),
|
||||
cc_tax_percentage Decimal(7,2),
|
||||
PRIMARY KEY (cc_call_center_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE catalog_page(
|
||||
cp_catalog_page_sk Int64,
|
||||
cp_catalog_page_id LowCardinality(String),
|
||||
cp_start_date_sk Nullable(UInt32),
|
||||
cp_end_date_sk Nullable(UInt32),
|
||||
cp_department LowCardinality(Nullable(String)),
|
||||
cp_catalog_number Nullable(Int32),
|
||||
cp_catalog_page_number Nullable(Int32),
|
||||
cp_description LowCardinality(Nullable(String)),
|
||||
cp_type LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (cp_catalog_page_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE catalog_returns(
|
||||
cr_returned_date_sk Int32,
|
||||
cr_returned_time_sk Int64,
|
||||
cr_item_sk Int64,
|
||||
cr_refunded_customer_sk Nullable(Int64),
|
||||
cr_refunded_cdemo_sk Nullable(Int64),
|
||||
cr_refunded_hdemo_sk Nullable(Int64),
|
||||
cr_refunded_addr_sk Nullable(Int64),
|
||||
cr_returning_customer_sk Nullable(Int64),
|
||||
cr_returning_cdemo_sk Nullable(Int64),
|
||||
cr_returning_hdemo_sk Nullable(Int64),
|
||||
cr_returning_addr_sk Nullable(Int64),
|
||||
cr_call_center_sk Nullable(Int64),
|
||||
cr_catalog_page_sk Nullable(Int64),
|
||||
cr_ship_mode_sk Nullable(Int64),
|
||||
cr_warehouse_sk Nullable(Int64),
|
||||
cr_reason_sk Nullable(Int64),
|
||||
cr_order_number Int64,
|
||||
cr_return_quantity Nullable(Int32),
|
||||
cr_return_amount Nullable(Decimal(7,2)),
|
||||
cr_return_tax Nullable(Decimal(7,2)),
|
||||
cr_return_amt_inc_tax Nullable(Decimal(7,2)),
|
||||
cr_fee Nullable(Decimal(7,2)),
|
||||
cr_return_ship_cost Nullable(Decimal(7,2)),
|
||||
cr_refunded_cash Nullable(Decimal(7,2)),
|
||||
cr_reversed_charge Nullable(Decimal(7,2)),
|
||||
cr_store_credit Nullable(Decimal(7,2)),
|
||||
cr_net_loss Nullable(Decimal(7,2)),
|
||||
PRIMARY KEY (cr_item_sk, cr_order_number)
|
||||
);
|
||||
|
||||
CREATE TABLE catalog_sales (
|
||||
cs_sold_date_sk Nullable(UInt32),
|
||||
cs_sold_time_sk Nullable(Int64),
|
||||
cs_ship_date_sk Nullable(UInt32),
|
||||
cs_bill_customer_sk Nullable(Int64),
|
||||
cs_bill_cdemo_sk Nullable(Int64),
|
||||
cs_bill_hdemo_sk Nullable(Int64),
|
||||
cs_bill_addr_sk Nullable(Int64),
|
||||
cs_ship_customer_sk Nullable(Int64),
|
||||
cs_ship_cdemo_sk Nullable(Int64),
|
||||
cs_ship_hdemo_sk Nullable(Int64),
|
||||
cs_ship_addr_sk Nullable(Int64),
|
||||
cs_call_center_sk Nullable(Int64),
|
||||
cs_catalog_page_sk Nullable(Int64),
|
||||
cs_ship_mode_sk Nullable(Int64),
|
||||
cs_warehouse_sk Nullable(Int64),
|
||||
cs_item_sk Int64,
|
||||
cs_promo_sk Nullable(Int64),
|
||||
cs_order_number Int64,
|
||||
cs_quantity Nullable(Int32),
|
||||
cs_wholesale_cost Nullable(Decimal(7,2)),
|
||||
cs_list_price Nullable(Decimal(7,2)),
|
||||
cs_sales_price Nullable(Decimal(7,2)),
|
||||
cs_ext_discount_amt Nullable(Decimal(7,2)),
|
||||
cs_ext_sales_price Nullable(Decimal(7,2)),
|
||||
cs_ext_wholesale_cost Nullable(Decimal(7,2)),
|
||||
cs_ext_list_price Nullable(Decimal(7,2)),
|
||||
cs_ext_tax Nullable(Decimal(7,2)),
|
||||
cs_coupon_amt Nullable(Decimal(7,2)),
|
||||
cs_ext_ship_cost Nullable(Decimal(7,2)),
|
||||
cs_net_paid Nullable(Decimal(7,2)),
|
||||
cs_net_paid_inc_tax Nullable(Decimal(7,2)),
|
||||
cs_net_paid_inc_ship Nullable(Decimal(7,2)),
|
||||
cs_net_paid_inc_ship_tax Nullable(Decimal(7,2)),
|
||||
cs_net_profit Decimal(7,2),
|
||||
PRIMARY KEY (cs_item_sk, cs_order_number)
|
||||
);
|
||||
|
||||
CREATE TABLE customer_address (
|
||||
ca_address_sk Int64,
|
||||
ca_address_id LowCardinality(String),
|
||||
ca_street_number LowCardinality(Nullable(String)),
|
||||
ca_street_name LowCardinality(Nullable(String)),
|
||||
ca_street_type LowCardinality(Nullable(String)),
|
||||
ca_suite_number LowCardinality(Nullable(String)),
|
||||
ca_city LowCardinality(Nullable(String)),
|
||||
ca_county LowCardinality(Nullable(String)),
|
||||
ca_state LowCardinality(Nullable(String)),
|
||||
ca_zip LowCardinality(Nullable(String)),
|
||||
ca_country LowCardinality(Nullable(String)),
|
||||
ca_gmt_offset Nullable(Decimal(7,2)),
|
||||
ca_location_type LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (ca_address_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE customer_demographics (
|
||||
cd_demo_sk Int64,
|
||||
cd_gender LowCardinality(String),
|
||||
cd_marital_status LowCardinality(String),
|
||||
cd_education_status LowCardinality(String),
|
||||
cd_purchase_estimate Int32,
|
||||
cd_credit_rating LowCardinality(String),
|
||||
cd_dep_count Int32,
|
||||
cd_dep_employed_count Int32,
|
||||
cd_dep_college_count Int32,
|
||||
PRIMARY KEY (cd_demo_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE customer (
|
||||
c_customer_sk Int64,
|
||||
c_customer_id LowCardinality(String),
|
||||
c_current_cdemo_sk Nullable(Int64),
|
||||
c_current_hdemo_sk Nullable(Int64),
|
||||
c_current_addr_sk Nullable(Int64),
|
||||
c_first_shipto_date_sk Nullable(UInt32),
|
||||
c_first_sales_date_sk Nullable(UInt32),
|
||||
c_salutation LowCardinality(Nullable(String)),
|
||||
c_first_name LowCardinality(Nullable(String)),
|
||||
c_last_name LowCardinality(Nullable(String)),
|
||||
c_preferred_cust_flag LowCardinality(Nullable(String)),
|
||||
c_birth_day Nullable(Int32),
|
||||
c_birth_month Nullable(Int32),
|
||||
c_birth_year Nullable(Int32),
|
||||
c_birth_country LowCardinality(Nullable(String)),
|
||||
c_login LowCardinality(Nullable(String)),
|
||||
c_email_address LowCardinality(Nullable(String)),
|
||||
c_last_review_date LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (c_customer_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE date_dim (
|
||||
d_date_sk UInt32,
|
||||
d_date_id LowCardinality(String),
|
||||
d_date Date,
|
||||
d_month_seq UInt16,
|
||||
d_week_seq UInt16,
|
||||
d_quarter_seq UInt16,
|
||||
d_year UInt16,
|
||||
d_dow UInt16,
|
||||
d_moy UInt16,
|
||||
d_dom UInt16,
|
||||
d_qoy UInt16,
|
||||
d_fy_year UInt16,
|
||||
d_fy_quarter_seq UInt16,
|
||||
d_fy_week_seq UInt16,
|
||||
d_day_name LowCardinality(String),
|
||||
d_quarter_name LowCardinality(String),
|
||||
d_holiday LowCardinality(String),
|
||||
d_weekend LowCardinality(String),
|
||||
d_following_holiday LowCardinality(String),
|
||||
d_first_dom Int32,
|
||||
d_last_dom Int32,
|
||||
d_same_day_ly Int32,
|
||||
d_same_day_lq Int32,
|
||||
d_current_day LowCardinality(String),
|
||||
d_current_week LowCardinality(String),
|
||||
d_current_month LowCardinality(String),
|
||||
d_current_quarter LowCardinality(String),
|
||||
d_current_year LowCardinality(String),
|
||||
PRIMARY KEY (d_date_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE household_demographics (
|
||||
hd_demo_sk Int64,
|
||||
hd_income_band_sk Int64,
|
||||
hd_buy_potential LowCardinality(String),
|
||||
hd_dep_count Int32,
|
||||
hd_vehicle_count Int32,
|
||||
PRIMARY KEY (hd_demo_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE income_band(
|
||||
ib_income_band_sk Int64,
|
||||
ib_lower_bound Int32,
|
||||
ib_upper_bound Int32,
|
||||
PRIMARY KEY (ib_income_band_sk),
|
||||
);
|
||||
|
||||
CREATE TABLE inventory (
|
||||
inv_date_sk UInt32,
|
||||
inv_item_sk Int64,
|
||||
inv_warehouse_sk Int64,
|
||||
inv_quantity_on_hand Nullable(Int32)
|
||||
PRIMARY KEY (inv_date_sk, inv_item_sk, inv_warehouse_sk),
|
||||
);
|
||||
|
||||
CREATE TABLE item (
|
||||
i_item_sk Int64,
|
||||
i_item_id LowCardinality(String),
|
||||
i_rec_start_date LowCardinality(Nullable(String)),
|
||||
i_rec_end_date LowCardinality(Nullable(String)),
|
||||
i_item_desc LowCardinality(Nullable(String)),
|
||||
i_current_price Nullable(Decimal(7,2)),
|
||||
i_wholesale_cost Nullable(Decimal(7,2)),
|
||||
i_brand_id Nullable(Int32),
|
||||
i_brand LowCardinality(Nullable(String)),
|
||||
i_class_id Nullable(Int32),
|
||||
i_class LowCardinality(Nullable(String)),
|
||||
i_category_id Nullable(Int32),
|
||||
i_category LowCardinality(Nullable(String)),
|
||||
i_manufact_id Nullable(Int32),
|
||||
i_manufact LowCardinality(Nullable(String)),
|
||||
i_size LowCardinality(Nullable(String)),
|
||||
i_formulation LowCardinality(Nullable(String)),
|
||||
i_color LowCardinality(Nullable(String)),
|
||||
i_units LowCardinality(Nullable(String)),
|
||||
i_container LowCardinality(Nullable(String)),
|
||||
i_manager_id Nullable(Int32),
|
||||
i_product_name LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (i_item_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE promotion (
|
||||
p_promo_sk Int64,
|
||||
p_promo_id LowCardinality(String),
|
||||
p_start_date_sk Nullable(UInt32),
|
||||
p_end_date_sk Nullable(UInt32),
|
||||
p_item_sk Nullable(Int64),
|
||||
p_cost Nullable(Decimal(15,2)),
|
||||
p_response_target Nullable(Int32),
|
||||
p_promo_name LowCardinality(Nullable(String)),
|
||||
p_channel_dmail LowCardinality(Nullable(String)),
|
||||
p_channel_email LowCardinality(Nullable(String)),
|
||||
p_channel_catalog LowCardinality(Nullable(String)),
|
||||
p_channel_tv LowCardinality(Nullable(String)),
|
||||
p_channel_radio LowCardinality(Nullable(String)),
|
||||
p_channel_press LowCardinality(Nullable(String)),
|
||||
p_channel_event LowCardinality(Nullable(String)),
|
||||
p_channel_demo LowCardinality(Nullable(String)),
|
||||
p_channel_details LowCardinality(Nullable(String)),
|
||||
p_purpose LowCardinality(Nullable(String)),
|
||||
p_discount_active LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (p_promo_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE reason(
|
||||
r_reason_sk Int64,
|
||||
r_reason_id LowCardinality(String),
|
||||
r_reason_desc LowCardinality(String),
|
||||
PRIMARY KEY (r_reason_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE ship_mode(
|
||||
sm_ship_mode_sk Int64,
|
||||
sm_ship_mode_id LowCardinality(String),
|
||||
sm_type LowCardinality(String),
|
||||
sm_code LowCardinality(String),
|
||||
sm_carrier LowCardinality(String),
|
||||
sm_contract LowCardinality(String),
|
||||
PRIMARY KEY (sm_ship_mode_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE store_returns (
|
||||
sr_returned_date_sk Nullable(UInt32),
|
||||
sr_return_time_sk Nullable(Int64),
|
||||
sr_item_sk Int64,
|
||||
sr_customer_sk Nullable(Int64),
|
||||
sr_cdemo_sk Nullable(Int64),
|
||||
sr_hdemo_sk Nullable(Int64),
|
||||
sr_addr_sk Nullable(Int64),
|
||||
sr_store_sk Nullable(Int64),
|
||||
sr_reason_sk Nullable(Int64),
|
||||
sr_ticket_number Int64,
|
||||
sr_return_quantity Nullable(Int32),
|
||||
sr_return_amt Nullable(Decimal(7,2)),
|
||||
sr_return_tax Nullable(Decimal(7,2)),
|
||||
sr_return_amt_inc_tax Nullable(Decimal(7,2)),
|
||||
sr_fee Nullable(Decimal(7,2)),
|
||||
sr_return_ship_cost Nullable(Decimal(7,2)),
|
||||
sr_refunded_cash Nullable(Decimal(7,2)),
|
||||
sr_reversed_charge Nullable(Decimal(7,2)),
|
||||
sr_store_credit Nullable(Decimal(7,2)),
|
||||
sr_net_loss Nullable(Decimal(7,2)),
|
||||
PRIMARY KEY (sr_item_sk, sr_ticket_number)
|
||||
);
|
||||
|
||||
CREATE TABLE store_sales (
|
||||
ss_sold_date_sk Nullable(UInt32),
|
||||
ss_sold_time_sk Nullable(Int64),
|
||||
ss_item_sk Int64,
|
||||
ss_customer_sk Nullable(Int64),
|
||||
ss_cdemo_sk Nullable(Int64),
|
||||
ss_hdemo_sk Nullable(Int64),
|
||||
ss_addr_sk Nullable(Int64),
|
||||
ss_store_sk Nullable(Int64),
|
||||
ss_promo_sk Nullable(Int64),
|
||||
ss_ticket_number Int64,
|
||||
ss_quantity Nullable(Int32),
|
||||
ss_wholesale_cost Nullable(Decimal(7,2)),
|
||||
ss_list_price Nullable(Decimal(7,2)),
|
||||
ss_sales_price Nullable(Decimal(7,2)),
|
||||
ss_ext_discount_amt Nullable(Decimal(7,2)),
|
||||
ss_ext_sales_price Nullable(Decimal(7,2)),
|
||||
ss_ext_wholesale_cost Nullable(Decimal(7,2)),
|
||||
ss_ext_list_price Nullable(Decimal(7,2)),
|
||||
ss_ext_tax Nullable(Decimal(7,2)),
|
||||
ss_coupon_amt Nullable(Decimal(7,2)),
|
||||
ss_net_paid Nullable(Decimal(7,2)),
|
||||
ss_net_paid_inc_tax Nullable(Decimal(7,2)),
|
||||
ss_net_profit Nullable(Decimal(7,2)),
|
||||
PRIMARY KEY (ss_item_sk, ss_ticket_number)
|
||||
);
|
||||
|
||||
CREATE TABLE store (
|
||||
s_store_sk Int64,
|
||||
s_store_id LowCardinality(String),
|
||||
s_rec_start_date LowCardinality(Nullable(String)),
|
||||
s_rec_end_date LowCardinality(Nullable(String)),
|
||||
s_closed_date_sk Nullable(UInt32),
|
||||
s_store_name LowCardinality(Nullable(String)),
|
||||
s_number_employees Nullable(Int32),
|
||||
s_floor_space Nullable(Int32),
|
||||
s_hours LowCardinality(Nullable(String)),
|
||||
s_manager LowCardinality(Nullable(String)),
|
||||
s_market_id Nullable(Int32),
|
||||
s_geography_class LowCardinality(Nullable(String)),
|
||||
s_market_desc LowCardinality(Nullable(String)),
|
||||
s_market_manager LowCardinality(Nullable(String)),
|
||||
s_division_id Nullable(Int32),
|
||||
s_division_name LowCardinality(Nullable(String)),
|
||||
s_company_id Nullable(Int32),
|
||||
s_company_name LowCardinality(Nullable(String)),
|
||||
s_street_number LowCardinality(Nullable(String)),
|
||||
s_street_name LowCardinality(Nullable(String)),
|
||||
s_street_type LowCardinality(Nullable(String)),
|
||||
s_suite_number LowCardinality(Nullable(String)),
|
||||
s_city LowCardinality(Nullable(String)),
|
||||
s_county LowCardinality(Nullable(String)),
|
||||
s_state LowCardinality(Nullable(String)),
|
||||
s_zip LowCardinality(Nullable(String)),
|
||||
s_country LowCardinality(Nullable(String)),
|
||||
s_gmt_offset Nullable(Decimal(7,2)),
|
||||
s_tax_precentage Nullable(Decimal(7,2)),
|
||||
PRIMARY KEY (s_store_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE time_dim (
|
||||
t_time_sk UInt32,
|
||||
t_time_id LowCardinality(String),
|
||||
t_time UInt32,
|
||||
t_hour UInt8,
|
||||
t_minute UInt8,
|
||||
t_second UInt8,
|
||||
t_am_pm LowCardinality(String),
|
||||
t_shift LowCardinality(String),
|
||||
t_sub_shift LowCardinality(String),
|
||||
t_meal_time LowCardinality(Nullable(String)),
|
||||
PRIMARY KEY (t_time_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE warehouse(
|
||||
w_warehouse_sk Int64,
|
||||
w_warehouse_id LowCardinality(String),
|
||||
w_warehouse_name LowCardinality(Nullable(String)),
|
||||
w_warehouse_sq_ft Nullable(Int32),
|
||||
w_street_number LowCardinality(Nullable(String)),
|
||||
w_street_name LowCardinality(Nullable(String)),
|
||||
w_street_type LowCardinality(Nullable(String)),
|
||||
w_suite_number LowCardinality(Nullable(String)),
|
||||
w_city LowCardinality(Nullable(String)),
|
||||
w_county LowCardinality(Nullable(String)),
|
||||
w_state LowCardinality(Nullable(String)),
|
||||
w_zip LowCardinality(Nullable(String)),
|
||||
w_country LowCardinality(Nullable(String)),
|
||||
w_gmt_offset Decimal(7,2),
|
||||
PRIMARY KEY (w_warehouse_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE web_page(
|
||||
wp_web_page_sk Int64,
|
||||
wp_web_page_id LowCardinality(String),
|
||||
wp_rec_start_date LowCardinality(Nullable(String)),
|
||||
wp_rec_end_date LowCardinality(Nullable(String)),
|
||||
wp_creation_date_sk Nullable(UInt32),
|
||||
wp_access_date_sk Nullable(UInt32),
|
||||
wp_autogen_flag LowCardinality(Nullable(String)),
|
||||
wp_customer_sk Nullable(Int64),
|
||||
wp_url LowCardinality(Nullable(String)),
|
||||
wp_type LowCardinality(Nullable(String)),
|
||||
wp_char_count Nullable(Int32),
|
||||
wp_link_count Nullable(Int32),
|
||||
wp_image_count Nullable(Int32),
|
||||
wp_max_ad_count Nullable(Int32),
|
||||
PRIMARY KEY (wp_web_page_sk)
|
||||
);
|
||||
|
||||
CREATE TABLE web_returns (
|
||||
wr_returned_date_sk Nullable(UInt32),
|
||||
wr_returned_time_sk Nullable(Int64),
|
||||
wr_item_sk Int64,
|
||||
wr_refunded_customer_sk Nullable(Int64),
|
||||
wr_refunded_cdemo_sk Nullable(Int64),
|
||||
wr_refunded_hdemo_sk Nullable(Int64),
|
||||
wr_refunded_addr_sk Nullable(Int64),
|
||||
wr_returning_customer_sk Nullable(Int64),
|
||||
wr_returning_cdemo_sk Nullable(Int64),
|
||||
wr_returning_hdemo_sk Nullable(Int64),
|
||||
wr_returning_addr_sk Nullable(Int64),
|
||||
wr_web_page_sk Nullable(Int64),
|
||||
wr_reason_sk Nullable(Int64),
|
||||
wr_order_number Int64,
|
||||
wr_return_quantity Nullable(Int32),
|
||||
wr_return_amt Nullable(Decimal(7,2)),
|
||||
wr_return_tax Nullable(Decimal(7,2)),
|
||||
wr_return_amt_inc_tax Nullable(Decimal(7,2)),
|
||||
wr_fee Nullable(Decimal(7,2)),
|
||||
wr_return_ship_cost Nullable(Decimal(7,2)),
|
||||
wr_refunded_cash Nullable(Decimal(7,2)),
|
||||
wr_reversed_charge Nullable(Decimal(7,2)),
|
||||
wr_account_credit Nullable(Decimal(7,2)),
|
||||
wr_net_loss Nullable(Decimal(7,2)),
|
||||
PRIMARY KEY (wr_item_sk, wr_order_number)
|
||||
);
|
||||
|
||||
CREATE TABLE web_sales (
|
||||
ws_sold_date_sk Nullable(UInt32),
|
||||
ws_sold_time_sk Nullable(Int64),
|
||||
ws_ship_date_sk Nullable(UInt32),
|
||||
ws_item_sk Int64,
|
||||
ws_bill_customer_sk Nullable(Int64),
|
||||
ws_bill_cdemo_sk Nullable(Int64),
|
||||
ws_bill_hdemo_sk Nullable(Int64),
|
||||
ws_bill_addr_sk Nullable(Int64),
|
||||
ws_ship_customer_sk Nullable(Int64),
|
||||
ws_ship_cdemo_sk Nullable(Int64),
|
||||
ws_ship_hdemo_sk Nullable(Int64),
|
||||
ws_ship_addr_sk Nullable(Int64),
|
||||
ws_web_page_sk Nullable(Int64),
|
||||
ws_web_site_sk Nullable(Int64),
|
||||
ws_ship_mode_sk Nullable(Int64),
|
||||
ws_warehouse_sk Nullable(Int64),
|
||||
ws_promo_sk Nullable(Int64),
|
||||
ws_order_number Int64,
|
||||
ws_quantity Nullable(Int32),
|
||||
ws_wholesale_cost Nullable(Decimal(7,2)),
|
||||
ws_list_price Nullable(Decimal(7,2)),
|
||||
ws_sales_price Nullable(Decimal(7,2)),
|
||||
ws_ext_discount_amt Nullable(Decimal(7,2)),
|
||||
ws_ext_sales_price Nullable(Decimal(7,2)),
|
||||
ws_ext_wholesale_cost Nullable(Decimal(7,2)),
|
||||
ws_ext_list_price Nullable(Decimal(7,2)),
|
||||
ws_ext_tax Nullable(Decimal(7,2)),
|
||||
ws_coupon_amt Nullable(Decimal(7,2)),
|
||||
ws_ext_ship_cost Nullable(Decimal(7,2)),
|
||||
ws_net_paid Nullable(Decimal(7,2)),
|
||||
ws_net_paid_inc_tax Nullable(Decimal(7,2)),
|
||||
ws_net_paid_inc_ship Decimal(7,2),
|
||||
ws_net_paid_inc_ship_tax Decimal(7,2),
|
||||
ws_net_profit Decimal(7,2),
|
||||
PRIMARY KEY (ws_item_sk, ws_order_number)
|
||||
);
|
||||
|
||||
CREATE TABLE web_site (
|
||||
web_site_sk Int64,
|
||||
web_site_id LowCardinality(String),
|
||||
web_rec_start_date LowCardinality(String),
|
||||
web_rec_end_date LowCardinality(Nullable(String)),
|
||||
web_name LowCardinality(String),
|
||||
web_open_date_sk UInt32,
|
||||
web_close_date_sk Nullable(UInt32),
|
||||
web_class LowCardinality(String),
|
||||
web_manager LowCardinality(String),
|
||||
web_mkt_id Int32,
|
||||
web_mkt_class LowCardinality(String),
|
||||
web_mkt_desc LowCardinality(String),
|
||||
web_market_manager LowCardinality(String),
|
||||
web_company_id Int32,
|
||||
web_company_name LowCardinality(String),
|
||||
web_street_number LowCardinality(String),
|
||||
web_street_name LowCardinality(String),
|
||||
web_street_type LowCardinality(String),
|
||||
web_suite_number LowCardinality(String),
|
||||
web_city LowCardinality(String),
|
||||
web_county LowCardinality(String),
|
||||
web_state LowCardinality(String),
|
||||
web_zip LowCardinality(String),
|
||||
web_country LowCardinality(String),
|
||||
web_gmt_offset Decimal(7,2),
|
||||
web_tax_percentage Decimal(7,2),
|
||||
PRIMARY KEY (web_site_sk)
|
||||
);
|
||||
```
|
||||
|
||||
The data can be imported as follows:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO call_center FORMAT CSV" < call_center.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_page FORMAT CSV" < catalog_page.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_returns FORMAT CSV" < catalog_returns.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO catalog_sales FORMAT CSV" < catalog_sales.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer FORMAT CSV" < customer.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer_address FORMAT CSV" < customer_address.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customer_demographics FORMAT CSV" < customer_demographics.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO date_dim FORMAT CSV" < date_dim.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO household_demographics FORMAT CSV" < household_demographics.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO income_band FORMAT CSV" < income_band.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO inventory FORMAT CSV" < inventory.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO item FORMAT CSV" < item.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO promotion FORMAT CSV" < promotion.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO reason FORMAT CSV" < reason.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO ship_mode FORMAT CSV" < ship_mode.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store FORMAT CSV" < store.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store_returns FORMAT CSV" < store_returns.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO store_sales FORMAT CSV" < store_sales.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO time_dim FORMAT CSV" < time_dim.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO warehouse FORMAT CSV" < warehouse.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_page FORMAT CSV" < web_page.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_returns FORMAT CSV" < web_returns.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_sales FORMAT CSV" < web_sales.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO web_site FORMAT CSV" < web_site.tbl
|
||||
```
|
||||
|
||||
Then run the generated queries.
|
||||
|
||||
::::warning
|
||||
TPC-DS makes heavy use of correlated subqueries which are at the time of writing (September 2024) not supported by ClickHouse ([issue #6697](https://github.com/ClickHouse/ClickHouse/issues/6697)).
|
||||
As a result, many of above benchmark queries will fail with errors.
|
||||
::::
|
882
docs/en/getting-started/example-datasets/tpch.md
Normal file
882
docs/en/getting-started/example-datasets/tpch.md
Normal file
@ -0,0 +1,882 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/tpch
|
||||
sidebar_label: TPC-H
|
||||
description: "The TPC-H benchmark data set and queries."
|
||||
---
|
||||
|
||||
# TPC-H (1999)
|
||||
|
||||
A popular benchmark which models the internal data warehouse of a wholesale supplier.
|
||||
The data is stored into a 3rd normal form representation, requiring lots of joins at query runtime.
|
||||
Despite its age and its unrealistic assumption that the data is uniformly and independently distributed, TPC-H remains the most popular OLAP benchmark to date.
|
||||
|
||||
References
|
||||
- [TPC-H](https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp)
|
||||
- [New TPC Benchmarks for Decision Support and Web Commerce](https://doi.org/10.1145/369275.369291) (Poess et. al., 2000)
|
||||
- [TPC-H Analyzed: Hidden Messages and Lessons Learned from an Influential Benchmark](https://doi.org/10.1007/978-3-319-04936-6_5) (Boncz et. al.), 2013
|
||||
- [Quantifying TPC-H Choke Points and Their Optimizations](https://doi.org/10.14778/3389133.3389138) (Dresseler et. al.), 2020
|
||||
|
||||
First, checkout the TPC-H repository and compile the data generator:
|
||||
|
||||
``` bash
|
||||
git clone https://github.com/gregrahn/tpch-kit.git
|
||||
cd tpch-kit/dbgen
|
||||
make
|
||||
```
|
||||
|
||||
Then, generate the data. Parameter `-s` specifies the scale factor. For example, with `-s 100`, 600 million rows are generated for table 'lineitem'.
|
||||
|
||||
``` bash
|
||||
./dbgen -s 100
|
||||
```
|
||||
|
||||
Now create tables in ClickHouse:
|
||||
|
||||
```sql
|
||||
CREATE TABLE nation (
|
||||
n_nationkey Int32,
|
||||
n_name String,
|
||||
n_regionkey Int32,
|
||||
n_comment String)
|
||||
ORDER BY (n_regionkey, n_name);
|
||||
|
||||
CREATE TABLE region (
|
||||
r_regionkey Int32,
|
||||
r_name String,
|
||||
r_comment String)
|
||||
ORDER BY (r_name);
|
||||
|
||||
CREATE TABLE part (
|
||||
p_partkey Int32,
|
||||
p_name String,
|
||||
p_mfgr String,
|
||||
p_brand String,
|
||||
p_type String,
|
||||
p_size Int32,
|
||||
p_container String,
|
||||
p_retailprice Decimal(15,2),
|
||||
p_comment String)
|
||||
ORDER BY (p_mfgr, p_brand, p_type, p_name);
|
||||
|
||||
CREATE TABLE supplier (
|
||||
s_suppkey Int32,
|
||||
s_name String,
|
||||
s_address String,
|
||||
s_nationkey Int32,
|
||||
s_phone String,
|
||||
s_acctbal Decimal(15,2),
|
||||
s_comment String)
|
||||
ORDER BY (s_nationkey, s_address, s_name);
|
||||
|
||||
CREATE TABLE partsupp (
|
||||
ps_partkey Int32,
|
||||
ps_suppkey Int32,
|
||||
ps_availqty Int32,
|
||||
ps_supplycost Decimal(15,2),
|
||||
ps_comment String)
|
||||
ORDER BY (ps_suppkey, ps_availqty, ps_supplycost, ps_partkey);
|
||||
|
||||
CREATE TABLE customer (
|
||||
c_custkey Int32,
|
||||
c_name String,
|
||||
c_address String,
|
||||
c_nationkey Int32,
|
||||
c_phone String,
|
||||
c_acctbal Decimal(15,2),
|
||||
c_mktsegment String,
|
||||
c_comment String)
|
||||
ORDER BY (c_nationkey, c_mktsegment, c_address, c_name, c_custkey);
|
||||
|
||||
CREATE TABLE orders (
|
||||
o_orderkey Int32,
|
||||
o_custkey Int32,
|
||||
o_orderstatus String,
|
||||
o_totalprice Decimal(15,2),
|
||||
o_orderdate Date,
|
||||
o_orderpriority String,
|
||||
o_clerk String,
|
||||
o_shippriority Int32,
|
||||
o_comment String)
|
||||
ORDER BY (o_orderdate, o_orderstatus, o_custkey);
|
||||
|
||||
CREATE TABLE lineitem (
|
||||
l_orderkey Int32,
|
||||
l_partkey Int32,
|
||||
l_suppkey Int32,
|
||||
l_linenumber Int32,
|
||||
l_quantity Decimal(15,2),
|
||||
l_extendedprice Decimal(15,2),
|
||||
l_discount Decimal(15,2),
|
||||
l_tax Decimal(15,2),
|
||||
l_returnflag String,
|
||||
l_linestatus String,
|
||||
l_shipdate Date,
|
||||
l_commitdate Date,
|
||||
l_receiptdate Date,
|
||||
l_shipinstruct String,
|
||||
l_shipmode String,
|
||||
l_comment String)
|
||||
ORDER BY (l_suppkey, l_partkey, l_shipdate, l_commitdate, l_receiptdate);
|
||||
```
|
||||
|
||||
The data can be imported as follows:
|
||||
|
||||
``` bash
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO nation FORMAT CSV" < nation.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO region FORMAT CSV" < region.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO part FORMAT CSV" < part.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO partsupp FORMAT CSV" < partsupp.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO customers FORMAT CSV" < customers.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO orders FORMAT CSV" < orders.tbl
|
||||
clickhouse-client --format_csv_delimiter '|' --query "INSERT INTO lineitem FORMAT CSV" < lineitem.tbl
|
||||
```
|
||||
|
||||
The queries are generated by `./qgen -s <scaling_factor>`. Example queries for `s = 100`:
|
||||
|
||||
::::warning
|
||||
TPC-H makes heavy use of correlated subqueries which are at the time of writing (September 2024) not supported by ClickHouse ([issue #6697](https://github.com/ClickHouse/ClickHouse/issues/6697)).
|
||||
As a result, many of below benchmark queries will fail with errors.
|
||||
::::
|
||||
|
||||
Q1
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
l_returnflag,
|
||||
l_linestatus,
|
||||
sum(l_quantity) AS sum_qty,
|
||||
sum(l_extendedprice) AS sum_base_price,
|
||||
sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
|
||||
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
|
||||
avg(l_quantity) AS avg_qty,
|
||||
avg(l_extendedprice) AS avg_price,
|
||||
avg(l_discount) AS avg_disc,
|
||||
count(*) AS count_order
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_shipdate <= date '1998-12-01' - interval '100' day
|
||||
GROUP BY
|
||||
l_returnflag,
|
||||
l_linestatus
|
||||
ORDER BY
|
||||
l_returnflag,
|
||||
l_linestatus;
|
||||
```
|
||||
|
||||
Q2
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
s_acctbal,
|
||||
s_name,
|
||||
n_name,
|
||||
p_partkey,
|
||||
p_mfgr,
|
||||
s_address,
|
||||
s_phone,
|
||||
s_comment
|
||||
FROM
|
||||
part,
|
||||
supplier,
|
||||
partsupp,
|
||||
nation,
|
||||
region
|
||||
WHERE
|
||||
p_partkey = ps_partkey
|
||||
AND s_suppkey = ps_suppkey
|
||||
AND p_size = 21
|
||||
AND p_type LIKE '%COPPER'
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND r_name = 'AMERICA'
|
||||
AND ps_supplycost = (
|
||||
SELECT
|
||||
min(ps_supplycost)
|
||||
FROM
|
||||
partsupp,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE
|
||||
p_partkey = ps_partkey
|
||||
AND s_suppkey = ps_suppkey
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND r_name = 'AMERICA'
|
||||
)
|
||||
ORDER BY
|
||||
s_acctbal desc,
|
||||
n_name,
|
||||
s_name,
|
||||
p_partkey
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
Q3
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
l_orderkey,
|
||||
sum(l_extendedprice * (1 - l_discount)) AS revenue,
|
||||
o_orderdate,
|
||||
o_shippriority
|
||||
FROM
|
||||
customer,
|
||||
orders,
|
||||
lineitem
|
||||
WHERE
|
||||
c_mktsegment = 'BUILDING'
|
||||
AND c_custkey = o_custkey
|
||||
AND l_orderkey = o_orderkey
|
||||
AND o_orderdate < date '1995-03-10'
|
||||
AND l_shipdate > date '1995-03-10'
|
||||
GROUP BY
|
||||
l_orderkey,
|
||||
o_orderdate,
|
||||
o_shippriority
|
||||
ORDER BY
|
||||
revenue desc,
|
||||
o_orderdate
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
Q4
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
o_orderpriority,
|
||||
count(*) AS order_count
|
||||
FROM
|
||||
orders
|
||||
WHERE
|
||||
o_orderdate >= date '1994-07-01'
|
||||
AND o_orderdate < date '1994-07-01' + interval '3' month
|
||||
AND EXISTS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_orderkey = o_orderkey
|
||||
AND l_commitdate < l_receiptdate
|
||||
)
|
||||
GROUP BY
|
||||
o_orderpriority
|
||||
ORDER BY
|
||||
o_orderpriority;
|
||||
```
|
||||
|
||||
Q5
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
n_name,
|
||||
sum(l_extendedprice * (1 - l_discount)) AS revenue
|
||||
FROM
|
||||
customer,
|
||||
orders,
|
||||
lineitem,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
WHERE
|
||||
c_custkey = o_custkey
|
||||
AND l_orderkey = o_orderkey
|
||||
AND l_suppkey = s_suppkey
|
||||
AND c_nationkey = s_nationkey
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_regionkey = r_regionkey
|
||||
AND r_name = 'MIDDLE EAST'
|
||||
AND o_orderdate >= date '1994-01-01'
|
||||
AND o_orderdate < date '1994-01-01' + interval '1' year
|
||||
GROUP BY
|
||||
n_name
|
||||
ORDER BY
|
||||
revenue desc;
|
||||
```
|
||||
|
||||
Q6
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(l_extendedprice * l_discount) AS revenue
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_shipdate >= date '1994-01-01'
|
||||
AND l_shipdate < date '1994-01-01' + interval '1' year
|
||||
AND l_discount between 0.09 - 0.01 AND 0.09 + 0.01
|
||||
AND l_quantity < 24;
|
||||
```
|
||||
|
||||
Q7
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year,
|
||||
sum(volume) AS revenue
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
n1.n_name AS supp_nation,
|
||||
n2.n_name AS cust_nation,
|
||||
extract(year FROM l_shipdate) AS l_year,
|
||||
l_extendedprice * (1 - l_discount) AS volume
|
||||
FROM
|
||||
supplier,
|
||||
lineitem,
|
||||
orders,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2
|
||||
WHERE
|
||||
s_suppkey = l_suppkey
|
||||
AND o_orderkey = l_orderkey
|
||||
AND c_custkey = o_custkey
|
||||
AND s_nationkey = n1.n_nationkey
|
||||
AND c_nationkey = n2.n_nationkey
|
||||
AND (
|
||||
(n1.n_name = 'UNITED KINGDOM' AND n2.n_name = 'ETHIOPIA')
|
||||
OR (n1.n_name = 'ETHIOPIA' AND n2.n_name = 'UNITED KINGDOM')
|
||||
)
|
||||
AND l_shipdate between date '1995-01-01' AND date '1996-12-31'
|
||||
) AS shipping
|
||||
GROUP BY
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year
|
||||
ORDER BY
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year;
|
||||
```
|
||||
|
||||
Q8
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
o_year,
|
||||
sum(CASE
|
||||
WHEN nation = 'ETHIOPIA' THEN volume
|
||||
ELSE 0
|
||||
END) / sum(volume) AS mkt_share
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
extract(year FROM o_orderdate) AS o_year,
|
||||
l_extendedprice * (1 - l_discount) AS volume,
|
||||
n2.n_name AS nation
|
||||
FROM
|
||||
part,
|
||||
supplier,
|
||||
lineitem,
|
||||
orders,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2,
|
||||
region
|
||||
WHERE
|
||||
p_partkey = l_partkey
|
||||
AND s_suppkey = l_suppkey
|
||||
AND l_orderkey = o_orderkey
|
||||
AND o_custkey = c_custkey
|
||||
AND c_nationkey = n1.n_nationkey
|
||||
AND n1.n_regionkey = r_regionkey
|
||||
AND r_name = 'AFRICA'
|
||||
AND s_nationkey = n2.n_nationkey
|
||||
AND o_orderdate between date '1995-01-01' AND date '1996-12-31'
|
||||
AND p_type = 'SMALL POLISHED TIN'
|
||||
) AS all_nations
|
||||
GROUP BY
|
||||
o_year
|
||||
ORDER BY
|
||||
o_year;
|
||||
```
|
||||
|
||||
Q9
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
nation,
|
||||
o_year,
|
||||
sum(amount) AS sum_profit
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
n_name AS nation,
|
||||
extract(year FROM o_orderdate) AS o_year,
|
||||
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount
|
||||
FROM
|
||||
part,
|
||||
supplier,
|
||||
lineitem,
|
||||
partsupp,
|
||||
orders,
|
||||
nation
|
||||
WHERE
|
||||
s_suppkey = l_suppkey
|
||||
AND ps_suppkey = l_suppkey
|
||||
AND ps_partkey = l_partkey
|
||||
AND p_partkey = l_partkey
|
||||
AND o_orderkey = l_orderkey
|
||||
AND s_nationkey = n_nationkey
|
||||
AND p_name LIKE '%drab%'
|
||||
) AS profit
|
||||
GROUP BY
|
||||
nation,
|
||||
o_year
|
||||
ORDER BY
|
||||
nation,
|
||||
o_year desc;
|
||||
```
|
||||
|
||||
Q10
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c_custkey,
|
||||
c_name,
|
||||
sum(l_extendedprice * (1 - l_discount)) AS revenue,
|
||||
c_acctbal,
|
||||
n_name,
|
||||
c_address,
|
||||
c_phone,
|
||||
c_comment
|
||||
FROM
|
||||
customer,
|
||||
orders,
|
||||
lineitem,
|
||||
nation
|
||||
WHERE
|
||||
c_custkey = o_custkey
|
||||
AND l_orderkey = o_orderkey
|
||||
AND o_orderdate >= date '1993-06-01'
|
||||
AND o_orderdate < date '1993-06-01' + interval '3' month
|
||||
AND l_returnflag = 'R'
|
||||
AND c_nationkey = n_nationkey
|
||||
GROUP BY
|
||||
c_custkey,
|
||||
c_name,
|
||||
c_acctbal,
|
||||
c_phone,
|
||||
n_name,
|
||||
c_address,
|
||||
c_comment
|
||||
ORDER BY
|
||||
revenue desc
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
Q11
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
ps_partkey,
|
||||
sum(ps_supplycost * ps_availqty) AS value
|
||||
FROM
|
||||
partsupp,
|
||||
supplier,
|
||||
nation
|
||||
WHERE
|
||||
ps_suppkey = s_suppkey
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_name = 'MOZAMBIQUE'
|
||||
GROUP BY
|
||||
ps_partkey having
|
||||
sum(ps_supplycost * ps_availqty) > (
|
||||
SELECT
|
||||
sum(ps_supplycost * ps_availqty) * 0.0000010000
|
||||
FROM
|
||||
partsupp,
|
||||
supplier,
|
||||
nation
|
||||
WHERE
|
||||
ps_suppkey = s_suppkey
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_name = 'MOZAMBIQUE'
|
||||
)
|
||||
ORDER BY
|
||||
value desc;
|
||||
```
|
||||
|
||||
Q12
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
l_shipmode,
|
||||
sum(CASE
|
||||
WHEN o_orderpriority = '1-URGENT'
|
||||
OR o_orderpriority = '2-HIGH'
|
||||
THEN 1
|
||||
ELSE 0
|
||||
END) AS high_line_count,
|
||||
sum(CASE
|
||||
WHEN o_orderpriority <> '1-URGENT'
|
||||
AND o_orderpriority <> '2-HIGH'
|
||||
THEN 1
|
||||
ELSE 0
|
||||
END) AS low_line_count
|
||||
FROM
|
||||
orders,
|
||||
lineitem
|
||||
WHERE
|
||||
o_orderkey = l_orderkey
|
||||
AND l_shipmode in ('MAIL', 'AIR')
|
||||
AND l_commitdate < l_receiptdate
|
||||
AND l_shipdate < l_commitdate
|
||||
AND l_receiptdate >= date '1996-01-01'
|
||||
AND l_receiptdate < date '1996-01-01' + interval '1' year
|
||||
GROUP BY
|
||||
l_shipmode
|
||||
ORDER BY
|
||||
l_shipmode;
|
||||
```
|
||||
|
||||
Q13
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c_count,
|
||||
count(*) AS custdist
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
c_custkey,
|
||||
count(o_orderkey)
|
||||
FROM
|
||||
customer LEFT OUTER JOIN orders ON
|
||||
c_custkey = o_custkey
|
||||
AND o_comment NOT LIKE '%special%deposits%'
|
||||
GROUP BY
|
||||
c_custkey
|
||||
) AS c_orders
|
||||
GROUP BY
|
||||
c_count
|
||||
ORDER BY
|
||||
custdist desc,
|
||||
c_count desc;
|
||||
```
|
||||
|
||||
Q14
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
100.00 * sum(CASE
|
||||
WHEN p_type LIKE 'PROMO%'
|
||||
THEN l_extendedprice * (1 - l_discount)
|
||||
ELSE 0
|
||||
END) / sum(l_extendedprice * (1 - l_discount)) AS promo_revenue
|
||||
FROM
|
||||
lineitem,
|
||||
part
|
||||
WHERE
|
||||
l_partkey = p_partkey
|
||||
AND l_shipdate >= date '1996-10-01'
|
||||
AND l_shipdate < date '1996-10-01' + interval '1' month;
|
||||
```
|
||||
|
||||
Q15
|
||||
|
||||
```sql
|
||||
CREATE VIEW revenue0 (supplier_no, total_revenue) AS
|
||||
SELECT
|
||||
l_suppkey,
|
||||
sum(l_extendedprice * (1 - l_discount))
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_shipdate >= date '1997-06-01'
|
||||
AND l_shipdate < date '1997-06-01' + interval '3' month
|
||||
GROUP BY
|
||||
l_suppkey;
|
||||
|
||||
SELECT
|
||||
s_suppkey,
|
||||
s_name,
|
||||
s_address,
|
||||
s_phone,
|
||||
total_revenue
|
||||
FROM
|
||||
supplier,
|
||||
revenue0
|
||||
WHERE
|
||||
s_suppkey = supplier_no
|
||||
AND total_revenue = (
|
||||
SELECT
|
||||
max(total_revenue)
|
||||
FROM
|
||||
revenue0
|
||||
)
|
||||
ORDER BY
|
||||
s_suppkey;
|
||||
|
||||
DROP VIEW revenue0;
|
||||
```
|
||||
|
||||
Q16
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size,
|
||||
count(distinct ps_suppkey) AS supplier_cnt
|
||||
FROM
|
||||
partsupp,
|
||||
part
|
||||
WHERE
|
||||
p_partkey = ps_partkey
|
||||
AND p_brand <> 'Brand#15'
|
||||
AND p_type NOT LIKE 'SMALL POLISHED%'
|
||||
AND p_size in (21, 9, 46, 34, 50, 33, 17, 36)
|
||||
AND ps_suppkey NOT in (
|
||||
SELECT
|
||||
s_suppkey
|
||||
FROM
|
||||
supplier
|
||||
WHERE
|
||||
s_comment LIKE '%Customer%Complaints%'
|
||||
)
|
||||
GROUP BY
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size
|
||||
ORDER BY
|
||||
supplier_cnt desc,
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size;
|
||||
```
|
||||
|
||||
Q17
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(l_extendedprice) / 7.0 AS avg_yearly
|
||||
FROM
|
||||
lineitem,
|
||||
part
|
||||
WHERE
|
||||
p_partkey = l_partkey
|
||||
AND p_brand = 'Brand#52'
|
||||
AND p_container = 'MED CASE'
|
||||
AND l_quantity < (
|
||||
SELECT
|
||||
0.2 * avg(l_quantity)
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_partkey = p_partkey
|
||||
);
|
||||
```
|
||||
|
||||
Q18
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
c_name,
|
||||
c_custkey,
|
||||
o_orderkey,
|
||||
o_orderdate,
|
||||
o_totalprice,
|
||||
sum(l_quantity)
|
||||
FROM
|
||||
customer,
|
||||
orders,
|
||||
lineitem
|
||||
WHERE
|
||||
o_orderkey in (
|
||||
SELECT
|
||||
l_orderkey
|
||||
FROM
|
||||
lineitem
|
||||
GROUP BY
|
||||
l_orderkey having
|
||||
sum(l_quantity) > 313
|
||||
)
|
||||
AND c_custkey = o_custkey
|
||||
AND o_orderkey = l_orderkey
|
||||
GROUP BY
|
||||
c_name,
|
||||
c_custkey,
|
||||
o_orderkey,
|
||||
o_orderdate,
|
||||
o_totalprice
|
||||
ORDER BY
|
||||
o_totalprice desc,
|
||||
o_orderdate
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
Q19
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
sum(l_extendedprice* (1 - l_discount)) AS revenue
|
||||
FROM
|
||||
lineitem,
|
||||
part
|
||||
WHERE
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
AND p_brand = 'Brand#31'
|
||||
AND p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
|
||||
AND l_quantity >= 3 AND l_quantity <= 3 + 10
|
||||
AND p_size between 1 AND 5
|
||||
AND l_shipmode in ('AIR', 'AIR REG')
|
||||
AND l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
OR
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
AND p_brand = 'Brand#54'
|
||||
AND p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
|
||||
AND l_quantity >= 17 AND l_quantity <= 17 + 10
|
||||
AND p_size between 1 AND 10
|
||||
AND l_shipmode in ('AIR', 'AIR REG')
|
||||
AND l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
OR
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
AND p_brand = 'Brand#54'
|
||||
AND p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
|
||||
AND l_quantity >= 26 AND l_quantity <= 26 + 10
|
||||
AND p_size between 1 AND 15
|
||||
AND l_shipmode in ('AIR', 'AIR REG')
|
||||
AND l_shipinstruct = 'DELIVER IN PERSON'
|
||||
);
|
||||
```
|
||||
|
||||
Q20
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
s_name,
|
||||
s_address
|
||||
FROM
|
||||
supplier,
|
||||
nation
|
||||
WHERE
|
||||
s_suppkey in (
|
||||
SELECT
|
||||
ps_suppkey
|
||||
FROM
|
||||
partsupp
|
||||
WHERE
|
||||
ps_partkey in (
|
||||
SELECT
|
||||
p_partkey
|
||||
FROM
|
||||
part
|
||||
WHERE
|
||||
p_name LIKE 'chiffon%'
|
||||
)
|
||||
AND ps_availqty > (
|
||||
SELECT
|
||||
0.5 * sum(l_quantity)
|
||||
FROM
|
||||
lineitem
|
||||
WHERE
|
||||
l_partkey = ps_partkey
|
||||
AND l_suppkey = ps_suppkey
|
||||
AND l_shipdate >= date '1997-01-01'
|
||||
AND l_shipdate < date '1997-01-01' + interval '1' year
|
||||
)
|
||||
)
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_name = 'MOZAMBIQUE'
|
||||
ORDER BY
|
||||
s_name;
|
||||
```
|
||||
|
||||
Q21
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
s_name,
|
||||
count(*) AS numwait
|
||||
FROM
|
||||
supplier,
|
||||
lineitem l1,
|
||||
orders,
|
||||
nation
|
||||
WHERE
|
||||
s_suppkey = l1.l_suppkey
|
||||
AND o_orderkey = l1.l_orderkey
|
||||
AND o_orderstatus = 'F'
|
||||
AND l1.l_receiptdate > l1.l_commitdate
|
||||
AND EXISTS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
lineitem l2
|
||||
WHERE
|
||||
l2.l_orderkey = l1.l_orderkey
|
||||
AND l2.l_suppkey <> l1.l_suppkey
|
||||
)
|
||||
AND NOT EXISTS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
lineitem l3
|
||||
WHERE
|
||||
l3.l_orderkey = l1.l_orderkey
|
||||
AND l3.l_suppkey <> l1.l_suppkey
|
||||
AND l3.l_receiptdate > l3.l_commitdate
|
||||
)
|
||||
AND s_nationkey = n_nationkey
|
||||
AND n_name = 'RUSSIA'
|
||||
GROUP BY
|
||||
s_name
|
||||
ORDER BY
|
||||
numwait desc,
|
||||
s_name
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
Q22
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
cntrycode,
|
||||
count(*) AS numcust,
|
||||
sum(c_acctbal) AS totacctbal
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
substring(c_phone FROM 1 for 2) AS cntrycode,
|
||||
c_acctbal
|
||||
FROM
|
||||
customer
|
||||
WHERE
|
||||
substring(c_phone FROM 1 for 2) in
|
||||
('26', '34', '10', '18', '27', '12', '11')
|
||||
AND c_acctbal > (
|
||||
SELECT
|
||||
avg(c_acctbal)
|
||||
FROM
|
||||
customer
|
||||
WHERE
|
||||
c_acctbal > 0.00
|
||||
AND substring(c_phone FROM 1 for 2) in
|
||||
('26', '34', '10', '18', '27', '12', '11')
|
||||
)
|
||||
AND NOT EXISTS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
orders
|
||||
WHERE
|
||||
o_custkey = c_custkey
|
||||
)
|
||||
) AS custsale
|
||||
GROUP BY
|
||||
cntrycode
|
||||
ORDER BY
|
||||
cntrycode;
|
||||
```
|
@ -155,7 +155,7 @@ Format a query as usual, then place the values that you want to pass from the ap
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM table WHERE val = {tuple_in_tuple:Tuple(UInt8, Tuple(String, UInt8))}"
|
||||
$ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="number" --query "SELECT {col:Identifier} FROM {db:Identifier}.{tbl:Identifier} LIMIT 10"
|
||||
$ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="number" --param_alias="top_ten" --query "SELECT {col:Identifier} as {alias:Identifier} FROM {db:Identifier}.{tbl:Identifier} LIMIT 10"
|
||||
```
|
||||
|
||||
## Configuring {#interfaces_cli_configuration}
|
||||
@ -188,7 +188,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--memory-usage` – If specified, print memory usage to ‘stderr’ in non-interactive mode]. Possible values: 'none' - do not print memory usage, 'default' - print number of bytes, 'readable' - print memory usage in human-readable format.
|
||||
- `--stacktrace` – If specified, also print the stack trace if an exception occurs.
|
||||
- `--config-file` – The name of the configuration file.
|
||||
- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
|
||||
- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl).
|
||||
- `--history_file` — Path to a file containing command history.
|
||||
- `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
|
||||
- `--hardware-utilization` — Print hardware utilization information in progress bar.
|
||||
|
@ -39,7 +39,6 @@ The supported formats are:
|
||||
| [JSONCompact](#jsoncompact) | ✔ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ |
|
||||
| [JSONCompactWithProgress](#jsoncompactwithprogress) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
@ -988,59 +987,6 @@ Example:
|
||||
|
||||
Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here)
|
||||
|
||||
## JSONCompactWithProgress (#jsoncompactwithprogress)
|
||||
|
||||
In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object.
|
||||
|
||||
Each row is either a metadata object, data object, progress information or statistics object:
|
||||
|
||||
1. **Metadata Object (`meta`)**
|
||||
- Describes the structure of the data rows.
|
||||
- Fields: `name` (column name), `type` (data type, e.g., `UInt32`, `String`, etc.).
|
||||
- Example: `{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]}`
|
||||
- Appears before any data objects.
|
||||
|
||||
2. **Data Object (`data`)**
|
||||
- Represents a row of query results.
|
||||
- Fields: An array with values corresponding to the columns defined in the metadata.
|
||||
- Example: `{"data":["1", "John Doe"]}`
|
||||
- Appears after the metadata object, one per row.
|
||||
|
||||
3. **Progress Information Object (`progress`)**
|
||||
- Provides real-time progress feedback during query execution.
|
||||
- Fields: `read_rows`, `read_bytes`, `written_rows`, `written_bytes`, `total_rows_to_read`, `result_rows`, `result_bytes`, `elapsed_ns`.
|
||||
- Example: `{"progress":{"read_rows":"8","read_bytes":"168"}}`
|
||||
- May appear intermittently.
|
||||
|
||||
4. **Statistics Object (`statistics`)**
|
||||
- Summarizes query execution statistics.
|
||||
- Fields: `rows`, `rows_before_limit_at_least`, `elapsed`, `rows_read`, `bytes_read`.
|
||||
- Example: `{"statistics": {"rows":2, "elapsed":0.001995, "rows_read":8}}`
|
||||
- Appears at the end.
|
||||
|
||||
5. **Exception Object (`exception`)**
|
||||
- Represents an error that occurred during query execution.
|
||||
- Fields: A single text field containing the error message.
|
||||
- Example: `{"exception": "Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero..."}`
|
||||
- Appears when an error is encountered.
|
||||
|
||||
6. **Totals Object (`totals`)**
|
||||
- Provides the totals for each numeric column in the result set.
|
||||
- Fields: An array with total values corresponding to the columns defined in the metadata.
|
||||
- Example: `{"totals": ["", "3"]}`
|
||||
- Appears at the end of the data rows, if applicable.
|
||||
|
||||
Example:
|
||||
|
||||
```json
|
||||
{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]}
|
||||
{"progress":{"read_rows":"8","read_bytes":"168","written_rows":"0","written_bytes":"0","total_rows_to_read":"2","result_rows":"0","result_bytes":"0","elapsed_ns":"0"}}
|
||||
{"data":["1", "John Doe"]}
|
||||
{"data":["2", "Joe Doe"]}
|
||||
{"statistics": {"rows":2, "rows_before_limit_at_least":8, "elapsed":0.001995, "rows_read":8, "bytes_read":168}}
|
||||
```
|
||||
|
||||
|
||||
## JSONEachRow {#jsoneachrow}
|
||||
|
||||
In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object.
|
||||
@ -2059,7 +2005,7 @@ In this case autogenerated Protobuf schema will be saved in file `path/to/schema
|
||||
|
||||
### Drop Protobuf cache
|
||||
|
||||
To reload Protobuf schema loaded from [format_schema_path](../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-format_schema_path) use [SYSTEM DROP ... FORMAT CACHE](../sql-reference/statements/system.md/#system-drop-schema-format) statement.
|
||||
To reload Protobuf schema loaded from [format_schema_path](../operations/server-configuration-parameters/settings.md/#format_schema_path) use [SYSTEM DROP ... FORMAT CACHE](../sql-reference/statements/system.md/#system-drop-schema-format) statement.
|
||||
|
||||
```sql
|
||||
SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf
|
||||
@ -2622,7 +2568,7 @@ Result:
|
||||
|
||||
## Npy {#data-format-npy}
|
||||
|
||||
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse:
|
||||
This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse:
|
||||
|
||||
| Npy data type (`INSERT`) | ClickHouse data type | Npy data type (`SELECT`) |
|
||||
|--------------------------|-----------------------------------------------------------------|--------------------------|
|
||||
@ -2774,7 +2720,7 @@ can contain an absolute path or a path relative to the current directory on the
|
||||
If you use the client in the [batch mode](/docs/en/interfaces/cli.md/#cli_usage), the path to the schema must be relative due to security reasons.
|
||||
|
||||
If you input or output data via the [HTTP interface](/docs/en/interfaces/http.md) the file name specified in the format schema
|
||||
should be located in the directory specified in [format_schema_path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-format_schema_path)
|
||||
should be located in the directory specified in [format_schema_path](/docs/en/operations/server-configuration-parameters/settings.md/#format_schema_path)
|
||||
in the server configuration.
|
||||
|
||||
## Skipping Errors {#skippingerrors}
|
||||
|
@ -11,7 +11,7 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
|
||||
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
|
||||
HTTPS can be enabled as well with port 8443 by default.
|
||||
|
||||
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
|
||||
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#http_server_default_response) default value “Ok.” (with a line feed at the end)
|
||||
|
||||
``` bash
|
||||
$ curl 'http://localhost:8123/'
|
||||
@ -58,7 +58,7 @@ Connection: Close
|
||||
Content-Type: text/tab-separated-values; charset=UTF-8
|
||||
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
|
||||
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds": "0"}
|
||||
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
|
||||
1
|
||||
```
|
||||
@ -472,7 +472,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
|
||||
< X-ClickHouse-Format: Template
|
||||
< X-ClickHouse-Timezone: Asia/Shanghai
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
# HELP "Query" "Number of executing queries"
|
||||
# TYPE "Query" counter
|
||||
@ -668,7 +668,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
Say Hi!%
|
||||
@ -708,7 +708,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
|
||||
< Content-Type: text/plain; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
* Connection #0 to host localhost left intact
|
||||
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
|
||||
@ -766,7 +766,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Absolute Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
@ -785,13 +785,13 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
|
||||
< Content-Type: text/html; charset=UTF-8
|
||||
< Transfer-Encoding: chunked
|
||||
< Keep-Alive: timeout=10
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334", "real_time_microseconds":"0"}
|
||||
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"}
|
||||
<
|
||||
<html><body>Relative Path File</body></html>
|
||||
* Connection #0 to host localhost left intact
|
||||
```
|
||||
|
||||
## Valid JSON/XML response on exception during HTTP streaming {valid-output-on-exception-http-streaming}
|
||||
## Valid JSON/XML response on exception during HTTP streaming {valid-output-on-exception-http-streaming}
|
||||
|
||||
While query execution over HTTP an exception can happen when part of the data has already been sent. Usually an exception is sent to the client in plain text
|
||||
even if some specific data format was used to output data and the output may become invalid in terms of specified data format.
|
||||
|
@ -21,6 +21,11 @@ If there is a native driver available (e.g., [DBeaver](../integrations/dbeaver))
|
||||
|
||||
If your use case involves a particular tool that does not have a native ClickHouse driver, and you would like to use it via the MySQL interface and you found certain incompatibilities - please [create an issue](https://github.com/ClickHouse/ClickHouse/issues) in the ClickHouse repository.
|
||||
|
||||
::::note
|
||||
To support the SQL dialect of above BI tools better, ClickHouse's MySQL interface implicitly runs SELECT queries with setting [prefer_column_name_to_alias = 1](../operations/settings/settings.md#prefer-column-name-to-alias).
|
||||
This cannot be turned off and it can lead in rare edge cases to different behavior between queries sent to ClickHouse's normal and MySQL query interfaces.
|
||||
::::
|
||||
|
||||
## Enabling the MySQL Interface On ClickHouse Cloud
|
||||
|
||||
1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab
|
||||
@ -96,7 +101,7 @@ In this case, ensure that the username follows the `mysql4<subdomain>_<username>
|
||||
|
||||
## Enabling the MySQL Interface On Self-managed ClickHouse
|
||||
|
||||
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):
|
||||
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: PostgreSQL Interface
|
||||
|
||||
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directly supported by ClickHouse (for example, Amazon Redshift).
|
||||
|
||||
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings.md#postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
|
@ -151,7 +151,7 @@ Check:
|
||||
|
||||
- Endpoint settings.
|
||||
|
||||
Check [listen_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings.
|
||||
Check [listen_host](../operations/server-configuration-parameters/settings.md#listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#tcp_port) settings.
|
||||
|
||||
ClickHouse server accepts localhost connections only by default.
|
||||
|
||||
@ -163,8 +163,8 @@ Check:
|
||||
|
||||
Check:
|
||||
|
||||
- The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting.
|
||||
- Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
|
||||
- The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#tcp_port_secure) setting.
|
||||
- Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#openssl).
|
||||
|
||||
Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`.
|
||||
|
||||
|
@ -7,7 +7,7 @@ sidebar_label: Configuration Files
|
||||
# Configuration Files
|
||||
|
||||
The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`.
|
||||
(The ClickHouse keeper configuration lives in `/etc/clickhouse-keeper/keeper_config.xml` and thus the additional files need to be placed in `/etc/clickhouse-keeper/keeper_config.d/` )
|
||||
(The ClickHouse keeper configuration lives in `/etc/clickhouse-keeper/keeper_config.xml` and thus the additional files need to be placed in `/etc/clickhouse-keeper/keeper_config.d/` )
|
||||
|
||||
|
||||
It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.
|
||||
@ -154,7 +154,7 @@ will take the default value
|
||||
|
||||
The config can define substitutions. There are two types of substitutions:
|
||||
|
||||
- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
|
||||
|
||||
- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element.
|
||||
|
||||
|
@ -6,7 +6,7 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. `subjectAltName extension` supports the usage of one wildcard '*' in the server configuration. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. `subjectAltName extension` supports the usage of one wildcard '*' in the server configuration. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
|
||||
To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
|
||||
|
||||
@ -40,4 +40,4 @@ To enable SSL certificate authentication, a list of `Common Name`'s or `Subject
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
For the SSL [`chain of trust`](https://en.wikipedia.org/wiki/Chain_of_trust) to work correctly, it is also important to make sure that the [`caConfig`](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) parameter is configured properly.
|
||||
For the SSL [`chain of trust`](https://en.wikipedia.org/wiki/Chain_of_trust) to work correctly, it is also important to make sure that the [`caConfig`](../server-configuration-parameters/settings.md#openssl) parameter is configured properly.
|
||||
|
@ -50,7 +50,7 @@ This data is collected in the `system.asynchronous_metric_log` table.
|
||||
|
||||
ClickHouse server has embedded instruments for self-state monitoring.
|
||||
|
||||
To track server events use server logs. See the [logger](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file.
|
||||
To track server events use server logs. See the [logger](../operations/server-configuration-parameters/settings.md#logger) section of the configuration file.
|
||||
|
||||
ClickHouse collects:
|
||||
|
||||
@ -59,9 +59,9 @@ ClickHouse collects:
|
||||
|
||||
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
|
||||
|
||||
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
|
||||
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
|
||||
|
||||
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
|
||||
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](../operations/server-configuration-parameters/settings.md#prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
|
||||
|
||||
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
|
||||
|
||||
|
@ -28,7 +28,7 @@ SETTINGS allow_introspection_functions = 1
|
||||
|
||||
In self-managed deployments, to use query profiler:
|
||||
|
||||
- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
|
||||
- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#trace_log) section of the server configuration.
|
||||
|
||||
This section configures the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse does not clean up the table and all the stored virtual memory address may become invalid.
|
||||
|
||||
|
@ -3162,3 +3162,11 @@ Type: UInt64
|
||||
Default value: 100
|
||||
|
||||
Zero means unlimited
|
||||
|
||||
## use_legacy_mongodb_integration
|
||||
|
||||
Use the legacy MongoDB integration implementation. Deprecated.
|
||||
|
||||
Type: Bool
|
||||
|
||||
Default value: `true`.
|
||||
|
@ -216,6 +216,19 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
## use_skip_indexes_if_final {#use_skip_indexes_if_final}
|
||||
|
||||
Controls whether skipping indexes are used when executing a query with the FINAL modifier.
|
||||
|
||||
By default, this setting is disabled because skip indexes may exclude rows (granules) containing the latest data, which could lead to incorrect results. When enabled, skipping indexes are applied even with the FINAL modifier, potentially improving performance but with the risk of missing recent updates.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## force_data_skipping_indices {#force_data_skipping_indices}
|
||||
|
||||
Disables query execution if passed data skipping indices wasn't used.
|
||||
@ -933,7 +946,7 @@ Default value: `1`.
|
||||
|
||||
Setting up query logging.
|
||||
|
||||
Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query-log) server configuration parameter.
|
||||
Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md/#query-log) server configuration parameter.
|
||||
|
||||
Example:
|
||||
|
||||
@ -978,7 +991,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
|
||||
|
||||
Setting up query threads logging.
|
||||
|
||||
Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
|
||||
Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#query_thread_log) server configuration parameter.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -997,7 +1010,7 @@ log_query_threads=1
|
||||
|
||||
Setting up query views logging.
|
||||
|
||||
When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
|
||||
When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#query_views_log) server configuration parameter.
|
||||
|
||||
Example:
|
||||
|
||||
@ -1588,25 +1601,12 @@ This setting is useful for any replicated table.
|
||||
|
||||
An arbitrary integer expression that can be used to split work between replicas for a specific table.
|
||||
The value can be any integer expression.
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#parallel_replicas_custom_key_filter_type).
|
||||
|
||||
Simple expressions using primary keys are preferred.
|
||||
|
||||
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
|
||||
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
|
||||
|
||||
## parallel_replicas_custom_key_filter_type {#parallel_replicas_custom_key_filter_type}
|
||||
|
||||
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`.
|
||||
- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values.
|
||||
|
||||
Default value: `default`.
|
||||
|
||||
## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower}
|
||||
|
||||
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`.
|
||||
@ -1621,9 +1621,9 @@ Allows the filter type `range` to split the work evenly between replicas based o
|
||||
|
||||
When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
|
||||
|
||||
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
|
||||
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing
|
||||
|
||||
## allow_experimental_parallel_reading_from_replicas
|
||||
## enable_parallel_replicas
|
||||
|
||||
Enables or disables sending SELECT queries to all replicas of a table (up to `max_parallel_replicas`). Reading is parallelized and coordinated dynamically. It will work for any kind of MergeTree table.
|
||||
|
||||
@ -4773,7 +4773,7 @@ Use this setting only for backward compatibility if your use cases depend on old
|
||||
Sets the implicit time zone of the current session or query.
|
||||
The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
|
||||
The setting takes precedence over the globally configured (server-level) implicit time zone.
|
||||
A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
|
||||
A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#timezone).
|
||||
|
||||
You can use functions `timeZone()` and `serverTimeZone()` to get the session time zone and server time zone.
|
||||
|
||||
@ -4829,7 +4829,7 @@ This happens due to different parsing pipelines:
|
||||
|
||||
**See also**
|
||||
|
||||
- [timezone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [timezone](../server-configuration-parameters/settings.md#timezone)
|
||||
|
||||
## final {#final}
|
||||
|
||||
@ -5682,3 +5682,29 @@ Default value: `0`.
|
||||
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## show_create_query_identifier_quoting_rule
|
||||
|
||||
Define identifier quoting behavior of the show create query result:
|
||||
- `when_necessary`: When the identifiers is one of `{"distinct", "all", "table"}`, or it can cause ambiguity: column names, dictionary attribute names.
|
||||
- `always`: Always quote identifiers.
|
||||
- `user_display`: When the identifiers is a keyword.
|
||||
|
||||
Default value: `when_necessary`.
|
||||
|
||||
## show_create_query_identifier_quoting_style
|
||||
|
||||
Define identifier quoting style of the show create query result:
|
||||
- `Backticks`: \`clickhouse\` style.
|
||||
- `DoubleQuotes`: "postgres" style
|
||||
- `BackticksMySQL`: \`mysql\` style, most same as `Backticks`, but it uses '``' to escape '`'
|
||||
|
||||
Default value: `Backticks`.
|
||||
|
||||
## mongodb_throw_on_unsupported_query
|
||||
|
||||
If enabled, MongoDB tables will return an error when a MongoDB query can't be built.
|
||||
|
||||
Not applied for the legacy implementation, or when 'allow_experimental_analyzer=0`.
|
||||
|
||||
Default value: `true`.
|
||||
|
@ -5,9 +5,9 @@ slug: /en/operations/system-tables/asynchronous_insert_log
|
||||
|
||||
Contains information about async inserts. Each entry represents an insert query buffered into an async insert query.
|
||||
|
||||
To start logging configure parameters in the [asynchronous_insert_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-asynchronous_insert_log) section.
|
||||
To start logging configure parameters in the [asynchronous_insert_log](../../operations/server-configuration-parameters/settings.md#asynchronous_insert_log) section.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [asynchronous_insert_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-asynchronous_insert_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [asynchronous_insert_log](../../operations/server-configuration-parameters/settings.md#asynchronous_insert_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse does not delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
|
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/graphite_retentions
|
||||
---
|
||||
# graphite_retentions
|
||||
|
||||
Contains information about parameters [graphite_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
|
||||
Contains information about parameters [graphite_rollup](../../operations/server-configuration-parameters/settings.md#graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
|
||||
|
||||
Columns:
|
||||
|
||||
|
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/part_log
|
||||
---
|
||||
# part_log
|
||||
|
||||
The `system.part_log` table is created only if the [part_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
|
||||
The `system.part_log` table is created only if the [part_log](../../operations/server-configuration-parameters/settings.md#part-log) server setting is specified.
|
||||
|
||||
This table contains information about events that occurred with [data parts](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data.
|
||||
|
||||
|
@ -9,11 +9,11 @@ Contains information about executed queries, for example, start time, duration o
|
||||
This table does not contain the ingested data for `INSERT` queries.
|
||||
:::
|
||||
|
||||
You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
|
||||
You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#query-log) section of the server configuration.
|
||||
|
||||
You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#log-queries). We do not recommend to turn off logging because information in this table is important for solving issues.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse does not delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
|
@ -7,10 +7,10 @@ Contains information about threads that execute queries, for example, thread nam
|
||||
|
||||
To start logging:
|
||||
|
||||
1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
|
||||
1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#query_thread_log) section.
|
||||
2. Set [log_query_threads](../../operations/settings/settings.md#log-query-threads) to 1.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse does not delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
|
@ -7,10 +7,10 @@ Contains information about the dependent views executed when running a query, fo
|
||||
|
||||
To start logging:
|
||||
|
||||
1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section.
|
||||
1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#query_views_log) section.
|
||||
2. Set [log_query_views](../../operations/settings/settings.md#log-query-views) to 1.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse does not delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
|
@ -4,7 +4,7 @@ slug: /en/operations/system-tables/server_settings
|
||||
# server_settings
|
||||
|
||||
Contains information about global settings for the server, which were specified in `config.xml`.
|
||||
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger)).
|
||||
Currently, the table shows only settings from the first layer of `config.xml` and doesn't support nested configs (e.g. [logger](../../operations/server-configuration-parameters/settings.md#logger)).
|
||||
|
||||
Columns:
|
||||
|
||||
@ -50,7 +50,7 @@ WHERE name LIKE '%thread_pool%'
|
||||
|
||||
```
|
||||
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check
|
||||
Using of `WHERE changed` can be useful, for example, when you want to check
|
||||
whether settings in configuration files are loaded correctly and are in use.
|
||||
|
||||
<!-- -->
|
||||
|
@ -5,7 +5,7 @@ slug: /en/operations/system-tables/trace_log
|
||||
|
||||
Contains stack traces collected by the [sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md).
|
||||
|
||||
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step),
|
||||
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step),
|
||||
[memory_profiler_sample_probability](../../operations/settings/settings.md#memory_profiler_sample_probability), [trace_profile_events](../../operations/settings/settings.md#trace_profile_events).
|
||||
|
||||
To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions.
|
||||
|
@ -12,7 +12,7 @@ A utility providing filesystem-like operations for ClickHouse disks. It can work
|
||||
|
||||
* `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`.
|
||||
* `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`.
|
||||
* `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`.
|
||||
* `--log-level` -- What [type](../server-configuration-parameters/settings#logger) of events to log, defaults to `none`.
|
||||
* `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`.
|
||||
* `--query, -q` -- single query that can be executed without launching interactive mode
|
||||
* `--help, -h` -- print all the options and commands with description
|
||||
@ -23,7 +23,7 @@ After the launch two disks are initialized. The first one is a disk `local` that
|
||||
## Clickhouse-disks state
|
||||
For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks.
|
||||
|
||||
State is reflected in a prompt "`disk_name`:`path_name`"
|
||||
State is reflected in a prompt "`disk_name`:`path_name`"
|
||||
|
||||
## Commands
|
||||
|
||||
@ -35,7 +35,7 @@ In these documentation file all mandatory positional arguments are referred as `
|
||||
Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode))
|
||||
to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)).
|
||||
* `current_disk_with_path (current, current_disk, current_path)`
|
||||
Print current state in format:
|
||||
Print current state in format:
|
||||
`Disk: "current_disk" Path: "current path on current disk"`
|
||||
* `help [<command>]`
|
||||
Print help message about command `command`. If `command` is not specified print information about all commands.
|
||||
@ -54,6 +54,6 @@ In these documentation file all mandatory positional arguments are referred as `
|
||||
* `read (r) <path-from> [--path-to path]`
|
||||
Read a file from `path-from` to `path` (`stdout` if not supplied).
|
||||
* `switch-disk [--path path] <disk>`
|
||||
Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`).
|
||||
Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`).
|
||||
* `write (w) [--path-from path] <path-to>`.
|
||||
Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`.
|
||||
|
@ -32,7 +32,7 @@ Timezone agnostic Unix timestamp is stored in tables, and the timezone is used t
|
||||
|
||||
A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia.
|
||||
|
||||
You can explicitly set a time zone for `DateTime`-type columns when creating a table. Example: `DateTime('UTC')`. If the time zone isn’t set, ClickHouse uses the value of the [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) parameter in the server settings or the operating system settings at the moment of the ClickHouse server start.
|
||||
You can explicitly set a time zone for `DateTime`-type columns when creating a table. Example: `DateTime('UTC')`. If the time zone isn’t set, ClickHouse uses the value of the [timezone](../../operations/server-configuration-parameters/settings.md#timezone) parameter in the server settings or the operating system settings at the moment of the ClickHouse server start.
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
@ -149,7 +149,7 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [The `Date` data type](../../sql-reference/data-types/date.md)
|
||||
|
@ -119,7 +119,7 @@ FROM dt64;
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times)
|
||||
- [`Date` data type](../../sql-reference/data-types/date.md)
|
||||
|
@ -31,9 +31,9 @@ ClickHouse:
|
||||
- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically.
|
||||
- Allows creating dictionaries with xml files or [DDL queries](../../sql-reference/statements/create/dictionary.md).
|
||||
|
||||
The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
|
||||
The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../operations/server-configuration-parameters/settings.md#dictionaries_config) parameter.
|
||||
|
||||
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting.
|
||||
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../operations/server-configuration-parameters/settings.md#dictionaries_lazy_load) setting.
|
||||
|
||||
The [dictionaries](../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there:
|
||||
|
||||
@ -1156,7 +1156,7 @@ Setting fields:
|
||||
- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
|
||||
- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
|
||||
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
|
||||
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
|
||||
@ -1191,7 +1191,7 @@ Setting fields:
|
||||
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
|
||||
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
|
||||
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
|
||||
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
|
||||
@ -1232,7 +1232,7 @@ SOURCE(HTTP(
|
||||
))
|
||||
```
|
||||
|
||||
In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl) in the server configuration.
|
||||
In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server-configuration-parameters/settings.md#openssl) in the server configuration.
|
||||
|
||||
Setting fields:
|
||||
|
||||
@ -1680,7 +1680,7 @@ Setting fields:
|
||||
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
|
||||
:::
|
||||
|
||||
#### Mongodb
|
||||
#### MongoDB
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1700,6 +1700,17 @@ Example of settings:
|
||||
|
||||
or
|
||||
|
||||
``` xml
|
||||
<source>
|
||||
<mongodb>
|
||||
<uri>mongodb://localhost:27017/test?ssl=true</uri>
|
||||
<collection>dictionary_source</collection>
|
||||
</mongodb>
|
||||
</source>
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
``` sql
|
||||
SOURCE(MONGODB(
|
||||
host 'localhost'
|
||||
@ -1722,6 +1733,22 @@ Setting fields:
|
||||
- `collection` – Name of the collection.
|
||||
- `options` - MongoDB connection string options (optional parameter).
|
||||
|
||||
or
|
||||
|
||||
``` sql
|
||||
SOURCE(MONGODB(
|
||||
uri 'mongodb://localhost:27017/clickhouse'
|
||||
collection 'dictionary_source'
|
||||
))
|
||||
```
|
||||
|
||||
Setting fields:
|
||||
|
||||
- `uri` - URI for establish the connection.
|
||||
- `collection` – Name of the collection.
|
||||
|
||||
[More information about the engine](../../engines/table-engines/integrations/mongodb.md)
|
||||
|
||||
|
||||
#### Redis
|
||||
|
||||
@ -2038,7 +2065,7 @@ Configuration fields:
|
||||
| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
|
||||
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).<br/><br/>Default value: `false`. | No |
|
||||
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
|
||||
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
|
||||
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
|
||||
|
||||
## Hierarchical Dictionaries
|
||||
|
||||
|
@ -595,6 +595,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res
|
||||
|
||||
Get the element with the index `n` from the array `arr`. `n` must be any integer type.
|
||||
Indexes in an array begin from one.
|
||||
|
||||
Negative indexes are supported. In this case, it selects the corresponding element numbered from the end. For example, `arr[-1]` is the last item in the array.
|
||||
|
||||
If the index falls outside of the bounds of an array, it returns some default value (0 for numbers, an empty string for strings, etc.), except for the case with a non-constant array and a constant index 0 (in this case there will be an error `Array indices are 1-based`).
|
||||
@ -616,6 +617,27 @@ SELECT has([1, 2, NULL], NULL)
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## arrayElementOrNull(arr, n)
|
||||
|
||||
Get the element with the index `n`from the array `arr`. `n` must be any integer type.
|
||||
Indexes in an array begin from one.
|
||||
|
||||
Negative indexes are supported. In this case, it selects the corresponding element numbered from the end. For example, `arr[-1]` is the last item in the array.
|
||||
|
||||
If the index falls outside of the bounds of an array, it returns `NULL` instead of a default value.
|
||||
|
||||
### Examples
|
||||
|
||||
``` sql
|
||||
SELECT arrayElementOrNull([1, 2, 3], 2), arrayElementOrNull([1, 2, 3], 4)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─arrayElementOrNull([1, 2, 3], 2)─┬─arrayElementOrNull([1, 2, 3], 4)─┐
|
||||
│ 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hasAll {#hasall}
|
||||
|
||||
Checks whether one array is a subset of another.
|
||||
@ -1717,6 +1739,24 @@ Result:
|
||||
[[1,1,2,3],[1,2,3,4]]
|
||||
```
|
||||
|
||||
## arrayUnion(arr)
|
||||
|
||||
Takes multiple arrays, returns an array that contains all elements that are present in any of the source arrays.
|
||||
|
||||
Example:
|
||||
```sql
|
||||
SELECT
|
||||
arrayUnion([-2, 1], [10, 1], [-2], []) as num_example,
|
||||
arrayUnion(['hi'], [], ['hello', 'hi']) as str_example,
|
||||
arrayUnion([1, 3, NULL], [2, 3, NULL]) as null_example
|
||||
```
|
||||
|
||||
```text
|
||||
┌─num_example─┬─str_example────┬─null_example─┐
|
||||
│ [10,-2,1] │ ['hello','hi'] │ [3,2,1,NULL] │
|
||||
└─────────────┴────────────────┴──────────────┘
|
||||
```
|
||||
|
||||
## arrayIntersect(arr)
|
||||
|
||||
Takes multiple arrays, returns an array with elements that are present in all source arrays.
|
||||
|
@ -83,7 +83,7 @@ Result:
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
|
||||
Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -153,7 +153,7 @@ makeDateTime(year, month, day, hour, minute, second[, timezone])
|
||||
- `hour` — Hour. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
|
||||
- `minute` — Minute. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
|
||||
- `second` — Second. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -186,11 +186,11 @@ makeDateTime64(year, month, day, hour, minute, second[, precision])
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
@ -294,7 +294,7 @@ Result:
|
||||
|
||||
## serverTimeZone
|
||||
|
||||
Returns the timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone).
|
||||
Returns the timezone of the server, i.e. the value of setting [timezone](../../operations/server-configuration-parameters/settings.md#timezone).
|
||||
If the function is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise, it produces a constant value.
|
||||
|
||||
**Syntax**
|
||||
@ -1269,7 +1269,7 @@ toStartOfSecond(value, [timezone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date and time. [DateTime64](../data-types/datetime64.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1309,7 +1309,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) server configuration parameter.
|
||||
|
||||
## toStartOfMillisecond
|
||||
|
||||
@ -1324,7 +1324,7 @@ toStartOfMillisecond(value, [timezone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1376,7 +1376,7 @@ toStartOfMicrosecond(value, [timezone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1416,7 +1416,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) server configuration parameter.
|
||||
|
||||
## toStartOfNanosecond
|
||||
|
||||
@ -1431,7 +1431,7 @@ toStartOfNanosecond(value, [timezone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1471,7 +1471,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter.
|
||||
- [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) server configuration parameter.
|
||||
|
||||
## toStartOfFiveMinutes
|
||||
|
||||
@ -1629,6 +1629,17 @@ The second overload emulates TimescaleDB's `time_bucket()` function, respectivel
|
||||
``` SQL
|
||||
SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30'));
|
||||
```
|
||||
|
||||
result:
|
||||
|
||||
``` reference
|
||||
┌───toStartOfInterval(...)─┐
|
||||
│ 2023-01-01 14:44:30 │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Aliases: `time_bucket`, `date_bin`.
|
||||
|
||||
**See Also**
|
||||
- [date_trunc](#date_trunc)
|
||||
|
||||
@ -1674,7 +1685,7 @@ Result:
|
||||
|
||||
## toRelativeYearNum
|
||||
|
||||
Converts a date, or date with time, to the number of years elapsed since a certain fixed point in the past.
|
||||
Converts a date, or date with time, to the number of years elapsed since a certain fixed point in the past.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2178,7 +2189,7 @@ age('unit', startdate, enddate, [timezone])
|
||||
|
||||
- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2254,7 +2265,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
|
||||
- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2323,7 +2334,7 @@ Alias: `dateTrunc`.
|
||||
`unit` argument is case-insensitive.
|
||||
|
||||
- `value` — Date and time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2703,7 +2714,7 @@ now([timezone])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2752,7 +2763,7 @@ now64([scale], [timezone])
|
||||
**Arguments**
|
||||
|
||||
- `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2786,7 +2797,7 @@ nowInBlock([timezone])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -2975,7 +2986,7 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
|
||||
**Arguments**
|
||||
|
||||
- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
|
||||
- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
|
||||
- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -3631,7 +3642,7 @@ addInterval(interval_1, interval_2)
|
||||
- Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)).
|
||||
|
||||
:::note
|
||||
Intervals of the same type will be combined into a single interval. For instance if `toIntervalDay(1)` and `toIntervalDay(2)` are passed then the result will be `(3)` rather than `(1,1)`.
|
||||
Intervals of the same type will be combined into a single interval. For instance if `toIntervalDay(1)` and `toIntervalDay(2)` are passed then the result will be `(3)` rather than `(1,1)`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -3682,7 +3693,7 @@ addTupleOfIntervals(interval_1, interval_2)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
WITH toDate('2018-01-01') AS date
|
||||
WITH toDate('2018-01-01') AS date
|
||||
SELECT addTupleOfIntervals(date, (INTERVAL 1 DAY, INTERVAL 1 MONTH, INTERVAL 1 YEAR))
|
||||
```
|
||||
|
||||
@ -4802,4 +4813,3 @@ timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02'));
|
||||
## Related content
|
||||
|
||||
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
|
||||
|
||||
|
@ -18,7 +18,7 @@ file(path[, default])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Example**
|
||||
|
@ -51,6 +51,40 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## RIPEMD160
|
||||
|
||||
Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
RIPEMD160(input)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `input`: Input string. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A 160-bit `RIPEMD-160` hash value of type [FixedString(20)](../data-types/fixedstring.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT HEX(RIPEMD160('The quick brown fox jumps over the lazy dog'));
|
||||
```
|
||||
|
||||
```response
|
||||
┌─HEX(RIPEMD160('The quick brown fox jumps over the lazy dog'))─┐
|
||||
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## sipHash64
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
@ -347,7 +381,7 @@ Result:
|
||||
## intHash64
|
||||
|
||||
Calculates a 64-bit hash code from any type of integer.
|
||||
This is a relatively fast non-cryptographic hash function of average quality for numbers.
|
||||
This is a relatively fast non-cryptographic hash function of average quality for numbers.
|
||||
It works faster than [intHash32](#inthash32).
|
||||
|
||||
**Syntax**
|
||||
@ -651,7 +685,7 @@ For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1
|
||||
|
||||
## kostikConsistentHash
|
||||
|
||||
An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`.
|
||||
An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -672,7 +706,7 @@ Alias: `yandexConsistentHash` (left for backwards compatibility sake).
|
||||
|
||||
**Implementation details**
|
||||
|
||||
It is efficient only if n <= 32768.
|
||||
It is efficient only if n <= 32768.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -688,40 +722,6 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ripeMD160
|
||||
|
||||
Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
ripeMD160(input)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `input`: Input string. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
|
||||
|
||||
**Example**
|
||||
|
||||
Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
|
||||
```
|
||||
|
||||
```response
|
||||
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
|
||||
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## murmurHash2_32, murmurHash2_64
|
||||
|
||||
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
|
||||
@ -937,7 +937,7 @@ SELECT xxHash64('')
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hash value. [UInt32/64](../data-types/int-uint.md).
|
||||
- Hash value. [UInt32/64](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`.
|
||||
|
@ -86,11 +86,11 @@ Returns the fully qualified domain name of the ClickHouse server.
|
||||
fqdn();
|
||||
```
|
||||
|
||||
Aliases: `fullHostName`, `FQDN`.
|
||||
Aliases: `fullHostName`, `FQDN`.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- String with the fully qualified domain name. [String](../data-types/string.md).
|
||||
- String with the fully qualified domain name. [String](../data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -245,7 +245,7 @@ Result:
|
||||
3. │ 5 │
|
||||
4. │ 5 │
|
||||
5. │ 5 │
|
||||
└─────────────┘
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## byteSize
|
||||
@ -347,7 +347,7 @@ Result:
|
||||
|
||||
Turns a constant into a full column containing a single value.
|
||||
Full columns and constants are represented differently in memory.
|
||||
Functions usually execute different code for normal and constant arguments, although the result should typically be the same.
|
||||
Functions usually execute different code for normal and constant arguments, although the result should typically be the same.
|
||||
This function can be used to debug this behavior.
|
||||
|
||||
**Syntax**
|
||||
@ -366,8 +366,8 @@ materialize(x)
|
||||
|
||||
**Example**
|
||||
|
||||
In the example below the `countMatches` function expects a constant second argument.
|
||||
This behaviour can be debugged by using the `materialize` function to turn a constant into a full column,
|
||||
In the example below the `countMatches` function expects a constant second argument.
|
||||
This behaviour can be debugged by using the `materialize` function to turn a constant into a full column,
|
||||
verifying that the function throws an error for a non-constant argument.
|
||||
|
||||
Query:
|
||||
@ -1837,7 +1837,7 @@ Returns the default value for the given data type.
|
||||
|
||||
Does not include default values for custom columns set by the user.
|
||||
|
||||
**Syntax**
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
defaultValueOfArgumentType(expression)
|
||||
@ -2164,7 +2164,7 @@ Result:
|
||||
|
||||
## filesystemCapacity
|
||||
|
||||
Returns the capacity of the filesystem in bytes. Needs the [path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) to the data directory to be configured.
|
||||
Returns the capacity of the filesystem in bytes. Needs the [path](../../operations/server-configuration-parameters/settings.md#path) to the data directory to be configured.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2592,7 +2592,7 @@ joinGetOrNull(join_storage_table_name, `value_column`, join_keys)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
|
||||
- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed.
|
||||
- `value_column` — name of the column of the table that contains required data.
|
||||
- `join_keys` — list of keys.
|
||||
|
||||
@ -2789,6 +2789,45 @@ Result:
|
||||
|
||||
- [Custom Settings](../../operations/settings/index.md#custom_settings)
|
||||
|
||||
## getSettingOrDefault
|
||||
|
||||
Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings) or returns the default value specified in the 2nd argument if the custom setting is not set in the current profile.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getSettingOrDefault('custom_setting', default_value);
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
- `custom_setting` — The setting name. [String](../data-types/string.md).
|
||||
- `default_value` — Value to return if custom_setting is not set. Value may be of any data type or Null.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The setting's current value or default_value if setting is not set.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT getSettingOrDefault('custom_undef1', 'my_value');
|
||||
SELECT getSettingOrDefault('custom_undef2', 100);
|
||||
SELECT getSettingOrDefault('custom_undef3', NULL);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
my_value
|
||||
100
|
||||
NULL
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Custom Settings](../../operations/settings/index.md#custom_settings)
|
||||
|
||||
## isDecimalOverflow
|
||||
|
||||
Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision.
|
||||
@ -2917,7 +2956,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)
|
||||
- [tcp_port](../../operations/server-configuration-parameters/settings.md#tcp_port)
|
||||
|
||||
## currentProfiles
|
||||
|
||||
@ -4031,7 +4070,7 @@ displayName()
|
||||
|
||||
**Example**
|
||||
|
||||
The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production':
|
||||
The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production':
|
||||
|
||||
```xml
|
||||
<!-- It is the name that will be shown in the clickhouse-client.
|
||||
@ -4279,4 +4318,3 @@ Result:
|
||||
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -288,7 +288,7 @@ roundToExp2(num)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `num`: A number representing an age in years. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md).
|
||||
- `num`: A number to round. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -10,7 +10,7 @@ Time window functions return the inclusive lower and exclusive upper bound of th
|
||||
|
||||
## tumble
|
||||
|
||||
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
||||
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -21,7 +21,7 @@ tumble(time_attr, interval [, timezone])
|
||||
**Arguments**
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -57,7 +57,7 @@ tumbleStart(time_attr, interval [, timezone]);
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
@ -95,7 +95,7 @@ tumbleEnd(time_attr, interval [, timezone]);
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
@ -132,7 +132,7 @@ hop(time_attr, hop_interval, window_interval [, timezone])
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -172,7 +172,7 @@ hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
@ -212,9 +212,9 @@ hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
|
@ -121,7 +121,7 @@ Unsupported arguments:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt8('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
For example: `SELECT toInt8(128) == -128;`.
|
||||
:::
|
||||
@ -193,7 +193,7 @@ This is not considered an error.
|
||||
- 8-bit integer value if successful, otherwise `0`. [Int8](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers.
|
||||
The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning it truncates fractional digits of numbers.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -492,7 +492,7 @@ Unsupported arguments (return `\N`)
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrNull('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -555,7 +555,7 @@ Arguments for which the default value is returned:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt16OrDefault('0xc0fe', CAST('-1', 'Int16'));`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -617,7 +617,7 @@ Unsupported arguments:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt32('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows.
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows.
|
||||
This is not considered an error.
|
||||
For example: `SELECT toInt32(2147483648) == -2147483648;`
|
||||
:::
|
||||
@ -680,7 +680,7 @@ Unsupported arguments (return `0`):
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrZero('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -739,7 +739,7 @@ Unsupported arguments (return `\N`)
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrNull('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -802,7 +802,7 @@ Arguments for which the default value is returned:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt32OrDefault('0xc0fe', CAST('-1', 'Int32'));`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -864,7 +864,7 @@ Unsupported types:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt64('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows.
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows.
|
||||
This is not considered an error.
|
||||
For example: `SELECT toInt64(9223372036854775808) == -9223372036854775808;`
|
||||
:::
|
||||
@ -927,7 +927,7 @@ Unsupported arguments (return `0`):
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrZero('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -987,7 +987,7 @@ Unsupported arguments (return `\N`)
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt64OrNull('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1112,7 +1112,7 @@ Unsupported arguments:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt128('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), the result over or under flows.
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), the result over or under flows.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1234,7 +1234,7 @@ Unsupported arguments (return `\N`)
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrNull('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1298,7 +1298,7 @@ Arguments for which the default value is returned:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt128OrDefault('0xc0fe', CAST('-1', 'Int128'));`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int128](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1360,7 +1360,7 @@ Unsupported arguments:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt256('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), the result over or under flows.
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), the result over or under flows.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1422,7 +1422,7 @@ Unsupported arguments (return `0`):
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrZero('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -1482,7 +1482,7 @@ Unsupported arguments (return `\N`)
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toInt256OrNull('0xc0fe');`.
|
||||
|
||||
:::note
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
If the input value cannot be represented within the bounds of [Int256](../data-types/int-uint.md), overflow or underflow of the result occurs.
|
||||
This is not considered an error.
|
||||
:::
|
||||
|
||||
@ -4063,7 +4063,7 @@ Result:
|
||||
|
||||
## toDateTime64OrDefault
|
||||
|
||||
Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md),
|
||||
Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md),
|
||||
but returns either the default value of [DateTime64](../data-types/datetime64.md)
|
||||
or the provided default if an invalid argument is received.
|
||||
|
||||
@ -4132,11 +4132,17 @@ Unsupported arguments:
|
||||
- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32('0xc0fe', 1);`.
|
||||
|
||||
:::note
|
||||
An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`.
|
||||
Excessive digits in a fraction are discarded (not rounded).
|
||||
An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`.
|
||||
Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an exception.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal32(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal32('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(9, S)`. [Decimal32(S)](../data-types/int-uint.md).
|
||||
@ -4328,6 +4334,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an error.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal32OrDefault(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal32OrDefault('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(9, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal32(S)](../data-types/decimal.md).
|
||||
@ -4391,6 +4403,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an exception.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal64(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal64('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(18, S)`. [Decimal64(S)](../data-types/int-uint.md).
|
||||
@ -4582,6 +4600,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an error.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal64OrDefault(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal64OrDefault('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(18, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal64(S)](../data-types/decimal.md).
|
||||
@ -4645,6 +4669,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an exception.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal128(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal128('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(38, S)`. [Decimal128(S)](../data-types/int-uint.md).
|
||||
@ -4836,6 +4866,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an error.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal128OrDefault(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal128OrDefault('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(38, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal128(S)](../data-types/decimal.md).
|
||||
@ -4899,6 +4935,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an exception.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal256(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal256('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(76, S)`. [Decimal256(S)](../data-types/int-uint.md).
|
||||
@ -5090,6 +5132,12 @@ Excessive digits in a fraction are discarded (not rounded).
|
||||
Excessive digits in the integer part will lead to an error.
|
||||
:::
|
||||
|
||||
:::warning
|
||||
Conversions drop extra digits and could operate in an unexpected way when working with Float32/Float64 inputs as the operations are performed using floating point instructions.
|
||||
For example: `toDecimal256OrDefault(1.15, 2)` is equal to `1.14` because 1.15 * 100 in floating point is 114.99.
|
||||
You can use a String input so the operations use the underlying integer type: `toDecimal256OrDefault('1.15', 2) = 1.15`
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of type `Decimal(76, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal256(S)](../data-types/decimal.md).
|
||||
@ -5261,7 +5309,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5299,7 +5347,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt16
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5375,7 +5423,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5413,7 +5461,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5489,7 +5537,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5565,7 +5613,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5603,7 +5651,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5641,7 +5689,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5679,7 +5727,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt256
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5717,7 +5765,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5751,7 +5799,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -5804,7 +5852,7 @@ reinterpretAsDate(x)
|
||||
**Implementation details**
|
||||
|
||||
:::note
|
||||
If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
|
||||
If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -5844,7 +5892,7 @@ reinterpretAsDateTime(x)
|
||||
**Implementation details**
|
||||
|
||||
:::note
|
||||
If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
|
||||
If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -5886,8 +5934,8 @@ reinterpretAsString(x)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
reinterpretAsString(toDateTime('1970-01-01 01:01:05')),
|
||||
SELECT
|
||||
reinterpretAsString(toDateTime('1970-01-01 01:01:05')),
|
||||
reinterpretAsString(toDate('1970-03-07'));
|
||||
```
|
||||
|
||||
@ -5922,8 +5970,8 @@ reinterpretAsFixedString(x)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')),
|
||||
SELECT
|
||||
reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')),
|
||||
reinterpretAsFixedString(toDate('1970-03-07'));
|
||||
```
|
||||
|
||||
@ -6702,7 +6750,7 @@ parseDateTime(str[, format[, timezone]])
|
||||
|
||||
- `str` — The String to be parsed
|
||||
- `format` — The format string. Optional. `%Y-%m-%d %H:%i:%s` if not specified.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
@ -6751,7 +6799,7 @@ parseDateTimeInJodaSyntax(str[, format[, timezone]])
|
||||
|
||||
- `str` — The String to be parsed
|
||||
- `format` — The format string. Optional. `yyyy-MM-dd HH:mm:ss` if not specified.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.
|
||||
- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). Optional.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
@ -6958,7 +7006,7 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]])
|
||||
|
||||
- `time_string` — String containing a date or date with time to convert. [String](../data-types/string.md).
|
||||
- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -7028,7 +7076,7 @@ toLowCardinality(expr)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`.
|
||||
- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -7249,7 +7297,7 @@ Result:
|
||||
|
||||
## fromUnixTimestamp64Nano
|
||||
|
||||
Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
|
||||
Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
|
||||
|
||||
:::note
|
||||
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
|
||||
|
@ -10,7 +10,7 @@ sidebar_label: UDF
|
||||
## Executable User Defined Functions
|
||||
ClickHouse can call any external executable program or script to process data.
|
||||
|
||||
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
|
||||
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#user_defined_executable_functions_config) parameter.
|
||||
|
||||
A function configuration contains the following settings:
|
||||
|
||||
@ -27,7 +27,7 @@ A function configuration contains the following settings:
|
||||
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
|
||||
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
|
||||
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
|
||||
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
|
||||
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
|
||||
|
||||
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
|
||||
|
@ -61,7 +61,7 @@ ULIDStringToDateTime(ulid[, timezone])
|
||||
**Arguments**
|
||||
|
||||
- `ulid` — Input ULID. [String](../data-types/string.md) or [FixedString(26)](../data-types/fixedstring.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -493,7 +493,7 @@ UUIDv7ToDateTime(uuid[, timezone])
|
||||
**Arguments**
|
||||
|
||||
- `uuid` — [UUID](../data-types/uuid.md) of version 7.
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#timezone) for the returned value (optional). [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -637,7 +637,7 @@ snowflakeToDateTime(value[, time_zone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -678,7 +678,7 @@ snowflakeToDateTime64(value[, time_zone])
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -793,7 +793,7 @@ snowflakeIDToDateTime(value[, epoch[, time_zone]])
|
||||
|
||||
- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md).
|
||||
- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -829,7 +829,7 @@ snowflakeIDToDateTime64(value[, epoch[, time_zone]])
|
||||
|
||||
- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md).
|
||||
- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md#timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -11,7 +11,7 @@ Changes roles.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
ALTER ROLE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
|
||||
[ON CLUSTER cluster_name]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
@ -11,7 +11,8 @@ Changes settings profiles.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
ALTER SETTINGS PROFILE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
|
||||
[ON CLUSTER cluster_name]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
|
||||
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
|
||||
```
|
||||
|
@ -10,12 +10,12 @@ Changes ClickHouse user accounts.
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[NOT IDENTIFIED | IDENTIFIED | ADD IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}]
|
||||
ALTER USER [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
|
||||
[ON CLUSTER cluster_name]
|
||||
[NOT IDENTIFIED | RESET AUTHENTICATION METHODS TO NEW | {IDENTIFIED | ADD IDENTIFIED} {[WITH {plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | WITH NO_PASSWORD | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}
|
||||
[, {[{plaintext_password | sha256_password | sha256_hash | ...}] BY {'password' | 'hash'}} | {ldap SERVER 'server_name'} | {...} | ... [,...]]]
|
||||
[[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[VALID UNTIL datetime]
|
||||
[RESET AUTHENTICATION METHODS TO NEW]
|
||||
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
|
||||
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
|
||||
|
@ -10,7 +10,7 @@ Creates new [roles](../../../guides/sre/user-management/index.md#role-management
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...]
|
||||
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 [,...]] [ON CLUSTER cluster_name]
|
||||
[IN access_storage_type]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
|
||||
```
|
||||
|
@ -10,10 +10,11 @@ Creates [settings profiles](../../../guides/sre/user-management/index.md#setting
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [, name2 [,...]]
|
||||
[ON CLUSTER cluster_name]
|
||||
[IN access_storage_type]
|
||||
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
|
||||
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
|
||||
```
|
||||
|
||||
`ON CLUSTER` clause allows creating settings profiles on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
|
||||
|
@ -43,6 +43,19 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine]
|
||||
|
||||
Creates a table with the same structure as another table. You can specify a different engine for the table. If the engine is not specified, the same engine will be used as for the `db2.name2` table.
|
||||
|
||||
### With a Schema and Data Cloned from Another Table
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name CLONE AS [db2.]name2 [ENGINE = engine]
|
||||
```
|
||||
|
||||
Creates a table with the same structure as another table. You can specify a different engine for the table. If the engine is not specified, the same engine will be used as for the `db2.name2` table. After the new table is created, all partitions from `db2.name2` are attached to it. In other words, the data of `db2.name2` is cloned into `db.table_name` upon creation. This query is equivalent to the following:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name AS [db2.]name2 [ENGINE = engine];
|
||||
ALTER TABLE [db.]table_name ATTACH PARTITION ALL FROM [db2].name2;
|
||||
```
|
||||
|
||||
### From a Table Function
|
||||
|
||||
``` sql
|
||||
|
@ -10,12 +10,11 @@ Creates [user accounts](../../../guides/sre/user-management/index.md#user-accoun
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [, name2 [,...]] [ON CLUSTER cluster_name]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | WITH NO_PASSWORD | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}
|
||||
[, {[{plaintext_password | sha256_password | sha256_hash | ...}] BY {'password' | 'hash'}} | {ldap SERVER 'server_name'} | {...} | ... [,...]]]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[VALID UNTIL datetime]
|
||||
[RESET AUTHENTICATION METHODS TO NEW]
|
||||
[IN access_storage_type]
|
||||
[DEFAULT ROLE role [,...]]
|
||||
[DEFAULT DATABASE database | NONE]
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user