diff --git a/.github/ISSUE_TEMPLATE/documentation-issue.md b/.github/ISSUE_TEMPLATE/documentation-issue.md index a8f31eadc56..557e5ea43c9 100644 --- a/.github/ISSUE_TEMPLATE/documentation-issue.md +++ b/.github/ISSUE_TEMPLATE/documentation-issue.md @@ -2,8 +2,7 @@ name: Documentation issue about: Report something incorrect or missing in documentation title: '' -labels: documentation -assignees: BayoNet +labels: comp-documentation --- diff --git a/.gitignore b/.gitignore index 52d58e68cb6..2cb20976632 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,5 @@ website/package-lock.json # Toolchains /cmake/toolchain/* + +*.iml diff --git a/.gitmodules b/.gitmodules index 2c1b15ffecb..bda2b470d9d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -44,6 +44,7 @@ [submodule "contrib/protobuf"] path = contrib/protobuf url = https://github.com/ClickHouse-Extras/protobuf.git + branch = v3.13.0.1 [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse-Extras/boost.git @@ -108,6 +109,7 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse-Extras/grpc.git + branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git @@ -191,3 +193,17 @@ path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring branch = v0.2.66 +[submodule "contrib/miniselect"] + path = contrib/miniselect + url = https://github.com/danlark1/miniselect +[submodule "contrib/rocksdb"] + path = contrib/rocksdb + url = https://github.com/facebook/rocksdb + branch = v6.14.5 +[submodule "contrib/xz"] + path = contrib/xz + url = https://github.com/xz-mirror/xz +[submodule "contrib/abseil-cpp"] + path = contrib/abseil-cpp + url = https://github.com/ClickHouse-Extras/abseil-cpp.git + branch = lts_2020_02_25 diff --git a/CHANGELOG.md b/CHANGELOG.md index 457346aff9a..355c664664d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,151 @@ +## ClickHouse release 20.11 + +### ClickHouse release v20.11.3.3-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v20.11.2.1, 2020-11-11 + +#### Backward Incompatible Change + +* If some `profile` was specified in `distributed_ddl` config section, then this profile could overwrite settings of `default` profile on server startup. It's fixed, now settings of distributed DDL queries should not affect global server settings. [#16635](https://github.com/ClickHouse/ClickHouse/pull/16635) ([tavplubix](https://github.com/tavplubix)). +* Restrict to use of non-comparable data types (like `AggregateFunction`) in keys (Sorting key, Primary key, Partition key, and so on). [#16601](https://github.com/ClickHouse/ClickHouse/pull/16601) ([alesapin](https://github.com/alesapin)). +* Remove `ANALYZE` and `AST` queries, and make the setting `enable_debug_queries` obsolete since now it is the part of full featured `EXPLAIN` query. [#16536](https://github.com/ClickHouse/ClickHouse/pull/16536) ([Ivan](https://github.com/abyss7)). +* Aggregate functions `boundingRatio`, `rankCorr`, `retention`, `timeSeriesGroupSum`, `timeSeriesGroupRateSum`, `windowFunnel` were erroneously made case-insensitive. Now their names are made case sensitive as designed. Only functions that are specified in SQL standard or made for compatibility with other DBMS or functions similar to those should be case-insensitive. [#16407](https://github.com/ClickHouse/ClickHouse/pull/16407) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make `rankCorr` function return nan on insufficient data https://github.com/ClickHouse/ClickHouse/issues/16124. [#16135](https://github.com/ClickHouse/ClickHouse/pull/16135) ([hexiaoting](https://github.com/hexiaoting)). + +#### New Feature + +* Added support of LDAP as a user directory for locally non-existent users. [#12736](https://github.com/ClickHouse/ClickHouse/pull/12736) ([Denis Glazachev](https://github.com/traceon)). +* Add `system.replicated_fetches` table which shows currently running background fetches. [#16428](https://github.com/ClickHouse/ClickHouse/pull/16428) ([alesapin](https://github.com/alesapin)). +* Added setting `date_time_output_format`. [#15845](https://github.com/ClickHouse/ClickHouse/pull/15845) ([Maksim Kita](https://github.com/kitaisreal)). +* Added minimal web UI to ClickHouse. [#16158](https://github.com/ClickHouse/ClickHouse/pull/16158) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allows to read/write Single protobuf message at once (w/o length-delimiters). [#15199](https://github.com/ClickHouse/ClickHouse/pull/15199) ([filimonov](https://github.com/filimonov)). +* Added initial OpenTelemetry support. ClickHouse now accepts OpenTelemetry traceparent headers over Native and HTTP protocols, and passes them downstream in some cases. The trace spans for executed queries are saved into the `system.opentelemetry_span_log` table. [#14195](https://github.com/ClickHouse/ClickHouse/pull/14195) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Allow specify primary key in column list of `CREATE TABLE` query. This is needed for compatibility with other SQL dialects. [#15823](https://github.com/ClickHouse/ClickHouse/pull/15823) ([Maksim Kita](https://github.com/kitaisreal)). +* Implement `OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}` in SELECT query with ORDER BY. This is the SQL-standard way to specify `LIMIT`. [#15855](https://github.com/ClickHouse/ClickHouse/pull/15855) ([hexiaoting](https://github.com/hexiaoting)). +* `errorCodeToName` function - return variable name of the error (useful for analyzing query_log and similar). `system.errors` table - shows how many times errors has been happened (respects `system_events_show_zero_values`). [#16438](https://github.com/ClickHouse/ClickHouse/pull/16438) ([Azat Khuzhin](https://github.com/azat)). +* Added function `untuple` which is a special function which can introduce new columns to the SELECT list by expanding a named tuple. [#16242](https://github.com/ClickHouse/ClickHouse/pull/16242) ([Nikolai Kochetov](https://github.com/KochetovNicolai), [Amos Bird](https://github.com/amosbird)). +* Now we can provide identifiers via query parameters. And these parameters can be used as table objects or columns. [#16594](https://github.com/ClickHouse/ClickHouse/pull/16594) ([Amos Bird](https://github.com/amosbird)). +* Added big integers (UInt256, Int128, Int256) and UUID data types support for MergeTree BloomFilter index. Big integers is an experimental feature. [#16642](https://github.com/ClickHouse/ClickHouse/pull/16642) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `farmFingerprint64` function (non-cryptographic string hashing). [#16570](https://github.com/ClickHouse/ClickHouse/pull/16570) ([Jacob Hayes](https://github.com/JacobHayes)). +* Add `log_queries_min_query_duration_ms`, only queries slower then the value of this setting will go to `query_log`/`query_thread_log` (i.e. something like `slow_query_log` in mysql). [#16529](https://github.com/ClickHouse/ClickHouse/pull/16529) ([Azat Khuzhin](https://github.com/azat)). +* Ability to create a docker image on the top of `Alpine`. Uses precompiled binary and glibc components from ubuntu 20.04. [#16479](https://github.com/ClickHouse/ClickHouse/pull/16479) ([filimonov](https://github.com/filimonov)). +* Added `toUUIDOrNull`, `toUUIDOrZero` cast functions. [#16337](https://github.com/ClickHouse/ClickHouse/pull/16337) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `max_concurrent_queries_for_all_users` setting, see [#6636](https://github.com/ClickHouse/ClickHouse/issues/6636) for use cases. [#16154](https://github.com/ClickHouse/ClickHouse/pull/16154) ([nvartolomei](https://github.com/nvartolomei)). +* Add a new option `print_query_id` to clickhouse-client. It helps generate arbitrary strings with the current query id generated by the client. Also print query id in clickhouse-client by default. [#15809](https://github.com/ClickHouse/ClickHouse/pull/15809) ([Amos Bird](https://github.com/amosbird)). +* Add `tid` and `logTrace` functions. This closes [#9434](https://github.com/ClickHouse/ClickHouse/issues/9434). [#15803](https://github.com/ClickHouse/ClickHouse/pull/15803) ([flynn](https://github.com/ucasFL)). +* Add function `formatReadableTimeDelta` that format time delta to human readable string ... [#15497](https://github.com/ClickHouse/ClickHouse/pull/15497) ([Filipe Caixeta](https://github.com/filipecaixeta)). +* Added `disable_merges` option for volumes in multi-disk configuration. [#13956](https://github.com/ClickHouse/ClickHouse/pull/13956) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Experimental Feature + +* New functions `encrypt`, `aes_encrypt_mysql`, `decrypt`, `aes_decrypt_mysql`. These functions are working slowly, so we consider it as an experimental feature. [#11844](https://github.com/ClickHouse/ClickHouse/pull/11844) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Bug Fix + +* Mask password in data_path in the `system.distribution_queue`. [#16727](https://github.com/ClickHouse/ClickHouse/pull/16727) ([Azat Khuzhin](https://github.com/azat)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* The setting `max_parallel_replicas` worked incorrectly if the queried table has no sampling. This fixes [#5733](https://github.com/ClickHouse/ClickHouse/issues/5733). [#16675](https://github.com/ClickHouse/ClickHouse/pull/16675) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix optimize_read_in_order/optimize_aggregation_in_order with max_threads > 0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Calculation of `DEFAULT` expressions was involving possible name collisions (that was very unlikely to encounter). This fixes [#9359](https://github.com/ClickHouse/ClickHouse/issues/9359). [#16612](https://github.com/ClickHouse/ClickHouse/pull/16612) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `query_thread_log.query_duration_ms` unit. [#16563](https://github.com/ClickHouse/ClickHouse/pull/16563) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine. `MaterializeMySQL` is an experimental feature. [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Specifically crafted argument of `round` function with `Decimal` was leading to integer division by zero. This fixes [#13338](https://github.com/ClickHouse/ClickHouse/issues/13338). [#16451](https://github.com/ClickHouse/ClickHouse/pull/16451) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Fix the exception thrown in `clickhouse-local` when trying to execute `OPTIMIZE` command. Fixes [#16076](https://github.com/ClickHouse/ClickHouse/issues/16076). [#16192](https://github.com/ClickHouse/ClickHouse/pull/16192) ([filimonov](https://github.com/filimonov)). +* Fixes [#15780](https://github.com/ClickHouse/ClickHouse/issues/15780) regression, e.g. `indexOf([1, 2, 3], toLowCardinality(1))` now is prohibited but it should not be. [#16038](https://github.com/ClickHouse/ClickHouse/pull/16038) ([Mike](https://github.com/myrrc)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). +* Now exception will be thrown when `ALTER MODIFY COLUMN ... DEFAULT ...` has incompatible default with column type. Fixes [#15854](https://github.com/ClickHouse/ClickHouse/issues/15854). [#15858](https://github.com/ClickHouse/ClickHouse/pull/15858) ([alesapin](https://github.com/alesapin)). +* Fixed IPv4CIDRToRange/IPv6CIDRToRange functions to accept const IP-column values. [#15856](https://github.com/ClickHouse/ClickHouse/pull/15856) ([vladimir-golovchenko](https://github.com/vladimir-golovchenko)). + +#### Improvement + +* Treat `INTERVAL '1 hour'` as equivalent to `INTERVAL 1 HOUR`, to be compatible with Postgres and similar. This fixes [#15637](https://github.com/ClickHouse/ClickHouse/issues/15637). [#15978](https://github.com/ClickHouse/ClickHouse/pull/15978) ([flynn](https://github.com/ucasFL)). +* Enable parsing enum values by their numeric ids for CSV, TSV and JSON input formats. [#15685](https://github.com/ClickHouse/ClickHouse/pull/15685) ([vivarum](https://github.com/vivarum)). +* Better read task scheduling for JBOD architecture and `MergeTree` storage. New setting `read_backoff_min_concurrency` which serves as the lower limit to the number of reading threads. [#16423](https://github.com/ClickHouse/ClickHouse/pull/16423) ([Amos Bird](https://github.com/amosbird)). +* Add missing support for `LowCardinality` in `Avro` format. [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Workaround for use `S3` with nginx server as proxy. Nginx currenty does not accept urls with empty path like `http://domain.com?delete`, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like `http://domain.com/?delete`. [#16814](https://github.com/ClickHouse/ClickHouse/pull/16814) ([ianton-ru](https://github.com/ianton-ru)). +* Better diagnostics on parse errors in input data. Provide row number on `Cannot read all data` errors. [#16644](https://github.com/ClickHouse/ClickHouse/pull/16644) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make the behaviour of `minMap` and `maxMap` more desireable. It will not skip zero values in the result. Fixes [#16087](https://github.com/ClickHouse/ClickHouse/issues/16087). [#16631](https://github.com/ClickHouse/ClickHouse/pull/16631) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Better update of ZooKeeper configuration in runtime. [#16630](https://github.com/ClickHouse/ClickHouse/pull/16630) ([sundyli](https://github.com/sundy-li)). +* Apply SETTINGS clause as early as possible. It allows to modify more settings in the query. This closes [#3178](https://github.com/ClickHouse/ClickHouse/issues/3178). [#16619](https://github.com/ClickHouse/ClickHouse/pull/16619) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now `event_time_microseconds` field stores in Decimal64, not UInt64. [#16617](https://github.com/ClickHouse/ClickHouse/pull/16617) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now paratmeterized functions can be used in `APPLY` column transformer. [#16589](https://github.com/ClickHouse/ClickHouse/pull/16589) ([Amos Bird](https://github.com/amosbird)). +* Improve scheduling of background task which removes data of dropped tables in `Atomic` databases. `Atomic` databases do not create broken symlink to table data directory if table actually has no data directory. [#16584](https://github.com/ClickHouse/ClickHouse/pull/16584) ([tavplubix](https://github.com/tavplubix)). +* Subqueries in `WITH` section (CTE) can reference previous subqueries in `WITH` section by their name. [#16575](https://github.com/ClickHouse/ClickHouse/pull/16575) ([Amos Bird](https://github.com/amosbird)). +* Add current_database into `system.query_thread_log`. [#16558](https://github.com/ClickHouse/ClickHouse/pull/16558) ([Azat Khuzhin](https://github.com/azat)). +* Allow to fetch parts that are already committed or outdated in the current instance into the detached directory. It's useful when migrating tables from another cluster and having N to 1 shards mapping. It's also consistent with the current fetchPartition implementation. [#16538](https://github.com/ClickHouse/ClickHouse/pull/16538) ([Amos Bird](https://github.com/amosbird)). +* Multiple improvements for `RabbitMQ`: Fixed bug for [#16263](https://github.com/ClickHouse/ClickHouse/issues/16263). Also minimized event loop lifetime. Added more efficient queues setup. [#16426](https://github.com/ClickHouse/ClickHouse/pull/16426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix debug assertion in `quantileDeterministic` function. In previous version it may also transfer up to two times more data over the network. Although no bug existed. This fixes [#15683](https://github.com/ClickHouse/ClickHouse/issues/15683). [#16410](https://github.com/ClickHouse/ClickHouse/pull/16410) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add `TablesToDropQueueSize` metric. It's equal to number of dropped tables, that are waiting for background data removal. [#16364](https://github.com/ClickHouse/ClickHouse/pull/16364) ([tavplubix](https://github.com/tavplubix)). +* Better diagnostics when client has dropped connection. In previous versions, `Attempt to read after EOF` and `Broken pipe` exceptions were logged in server. In new version, it's information message `Client has dropped the connection, cancel the query.`. [#16329](https://github.com/ClickHouse/ClickHouse/pull/16329) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add total_rows/total_bytes (from system.tables) support for Set/Join table engines. [#16306](https://github.com/ClickHouse/ClickHouse/pull/16306) ([Azat Khuzhin](https://github.com/azat)). +* Now it's possible to specify `PRIMARY KEY` without `ORDER BY` for MergeTree table engines family. Closes [#15591](https://github.com/ClickHouse/ClickHouse/issues/15591). [#16284](https://github.com/ClickHouse/ClickHouse/pull/16284) ([alesapin](https://github.com/alesapin)). +* If there is no tmp folder in the system (chroot, misconfigutation etc) `clickhouse-local` will create temporary subfolder in the current directory. [#16280](https://github.com/ClickHouse/ClickHouse/pull/16280) ([filimonov](https://github.com/filimonov)). +* Add support for nested data types (like named tuple) as sub-types. Fixes [#15587](https://github.com/ClickHouse/ClickHouse/issues/15587). [#16262](https://github.com/ClickHouse/ClickHouse/pull/16262) ([Ivan](https://github.com/abyss7)). +* Support for `database_atomic_wait_for_drop_and_detach_synchronously`/`NO DELAY`/`SYNC` for `DROP DATABASE`. [#16127](https://github.com/ClickHouse/ClickHouse/pull/16127) ([Azat Khuzhin](https://github.com/azat)). +* Add `allow_nondeterministic_optimize_skip_unused_shards` (to allow non deterministic like `rand()` or `dictGet()` in sharding key). [#16105](https://github.com/ClickHouse/ClickHouse/pull/16105) ([Azat Khuzhin](https://github.com/azat)). +* Fix `memory_profiler_step`/`max_untracked_memory` for queries via HTTP (test included). Fix the issue that adjusting this value globally in xml config does not help either, since those settings are not applied anyway, only default (4MB) value is [used](https://github.com/ClickHouse/ClickHouse/blob/17731245336d8c84f75e4c0894c5797ed7732190/src/Common/ThreadStatus.h#L104). Fix `query_id` for the most root ThreadStatus of the http query (by initializing QueryScope after reading query_id). [#16101](https://github.com/ClickHouse/ClickHouse/pull/16101) ([Azat Khuzhin](https://github.com/azat)). +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Fix rare issue when `clickhouse-client` may abort on exit due to loading of suggestions. This fixes [#16035](https://github.com/ClickHouse/ClickHouse/issues/16035). [#16047](https://github.com/ClickHouse/ClickHouse/pull/16047) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add support of `cache` layout for `Redis` dictionaries with complex key. [#15985](https://github.com/ClickHouse/ClickHouse/pull/15985) ([Anton Popov](https://github.com/CurtizJ)). +* Fix query hang (endless loop) in case of misconfiguration (`connections_with_failover_max_tries` set to 0). [#15876](https://github.com/ClickHouse/ClickHouse/pull/15876) ([Azat Khuzhin](https://github.com/azat)). +* Change level of some log messages from information to debug, so information messages will not appear for every query. This closes [#5293](https://github.com/ClickHouse/ClickHouse/issues/5293). [#15816](https://github.com/ClickHouse/ClickHouse/pull/15816) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove `MemoryTrackingInBackground*` metrics to avoid potentially misleading results. This fixes [#15684](https://github.com/ClickHouse/ClickHouse/issues/15684). [#15813](https://github.com/ClickHouse/ClickHouse/pull/15813) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add reconnects to `zookeeper-dump-tree` tool. [#15711](https://github.com/ClickHouse/ClickHouse/pull/15711) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow explicitly specify columns list in `CREATE TABLE table AS table_function(...)` query. Fixes [#9249](https://github.com/ClickHouse/ClickHouse/issues/9249) Fixes [#14214](https://github.com/ClickHouse/ClickHouse/issues/14214). [#14295](https://github.com/ClickHouse/ClickHouse/pull/14295) ([tavplubix](https://github.com/tavplubix)). + +#### Performance Improvement + +* Do not merge parts across partitions in SELECT FINAL. [#15938](https://github.com/ClickHouse/ClickHouse/pull/15938) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of `-OrNull` and `-OrDefault` aggregate functions. [#16661](https://github.com/ClickHouse/ClickHouse/pull/16661) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of `quantileMerge`. In previous versions it was obnoxiously slow. This closes [#1463](https://github.com/ClickHouse/ClickHouse/issues/1463). [#16643](https://github.com/ClickHouse/ClickHouse/pull/16643) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of logical functions a little. [#16347](https://github.com/ClickHouse/ClickHouse/pull/16347) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improved performance of merges assignment in MergeTree table engines. Shouldn't be visible for the user. [#16191](https://github.com/ClickHouse/ClickHouse/pull/16191) ([alesapin](https://github.com/alesapin)). +* Speedup hashed/sparse_hashed dictionary loading by preallocating the hash table. [#15454](https://github.com/ClickHouse/ClickHouse/pull/15454) ([Azat Khuzhin](https://github.com/azat)). +* Now trivial count optimization becomes slightly non-trivial. Predicates that contain exact partition expr can be optimized too. This also fixes [#11092](https://github.com/ClickHouse/ClickHouse/issues/11092) which returns wrong count when `max_parallel_replicas > 1`. [#15074](https://github.com/ClickHouse/ClickHouse/pull/15074) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement + +* Add flaky check for stateless tests. It will detect potentially flaky functional tests in advance, before they are merged. [#16238](https://github.com/ClickHouse/ClickHouse/pull/16238) ([alesapin](https://github.com/alesapin)). +* Use proper version for `croaring` instead of amalgamation. [#16285](https://github.com/ClickHouse/ClickHouse/pull/16285) ([sundyli](https://github.com/sundy-li)). +* Improve generation of build files for `ya.make` build system (Arcadia). [#16700](https://github.com/ClickHouse/ClickHouse/pull/16700) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add MySQL BinLog file check tool for `MaterializeMySQL` database engine. `MaterializeMySQL` is an experimental feature. [#16223](https://github.com/ClickHouse/ClickHouse/pull/16223) ([Winter Zhang](https://github.com/zhang2014)). +* Check for executable bit on non-executable files. People often accidentially commit executable files from Windows. [#15843](https://github.com/ClickHouse/ClickHouse/pull/15843) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Check for `#pragma once` in headers. [#15818](https://github.com/ClickHouse/ClickHouse/pull/15818) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix illegal code style `&vector[idx]` in libhdfs3. This fixes libcxx debug build. See also https://github.com/ClickHouse-Extras/libhdfs3/pull/8 . [#15815](https://github.com/ClickHouse/ClickHouse/pull/15815) ([Amos Bird](https://github.com/amosbird)). +* Fix build of one miscellaneous example tool on Mac OS. Note that we don't build examples on Mac OS in our CI (we build only ClickHouse binary), so there is zero chance it will not break again. This fixes [#15804](https://github.com/ClickHouse/ClickHouse/issues/15804). [#15808](https://github.com/ClickHouse/ClickHouse/pull/15808) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Simplify Sys/V init script. [#14135](https://github.com/ClickHouse/ClickHouse/pull/14135) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Added `boost::program_options` to `db_generator` in order to increase its usability. This closes [#15940](https://github.com/ClickHouse/ClickHouse/issues/15940). [#15973](https://github.com/ClickHouse/ClickHouse/pull/15973) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + + ## ClickHouse release 20.10 +### ClickHouse release v20.10.4.1-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Improvement + +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like http://domain.com?delete, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like http://domain.com/?delete. [#16813](https://github.com/ClickHouse/ClickHouse/pull/16813) ([ianton-ru](https://github.com/ianton-ru)). + + ### ClickHouse release v20.10.3.30, 2020-10-28 #### Backward Incompatible Change @@ -212,6 +358,84 @@ ## ClickHouse release 20.9 +### ClickHouse release v20.9.5.5-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + + +### ClickHouse release v20.9.4.76-stable (2020-10-29) + +#### Bug Fix + +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. I'm suprised how this bug can lurk for so long.... [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). + +#### Improvement + +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16160](https://github.com/ClickHouse/ClickHouse/pull/16160) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.9.3.45-stable (2020-10-09) + +#### Bug Fix + +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)). +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### Improvement + +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + + ### ClickHouse release v20.9.2.20, 2020-09-22 #### New Feature @@ -286,6 +510,110 @@ ## ClickHouse release 20.8 +### ClickHouse release v20.8.6.6-lts, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + + +### ClickHouse release v20.8.5.45-lts, 2020-10-29 + +#### Bug Fix + +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). + +#### Improvement + +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16159](https://github.com/ClickHouse/ClickHouse/pull/16159) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.8.4.11-lts, 2020-10-09 + +#### Bug Fix + +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)). +* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Improvement + +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + + +### ClickHouse release v20.8.3.18-stable, 2020-09-18 + +#### Bug Fix + +* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([myrrc](https://github.com/myrrc)). +* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement + +* Speed up server shutdown process if there are ongoing S3 requests. [#14858](https://github.com/ClickHouse/ClickHouse/pull/14858) ([Pavel Kovalenko](https://github.com/Jokser)). +* Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). +* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)). +* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)). + + ### ClickHouse release v20.8.2.3-stable, 2020-09-08 #### Backward Incompatible Change @@ -1636,6 +1964,74 @@ No changes compared to v20.4.3.16-stable. ## ClickHouse release v20.3 + +### ClickHouse release v20.3.21.2-lts, 2020-11-02 + +#### Bug Fix + +* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). + + +### ClickHouse release v20.3.20.6-lts, 2020-10-09 + +#### Bug Fix + +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15724](https://github.com/ClickHouse/ClickHouse/pull/15724), [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.3.19.4-lts, 2020-09-18 + +#### Bug Fix + +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). + +#### Improvement + +* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.3.18.10-lts, 2020-09-08 + +#### Bug Fix + +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)). +* Fixed data race in `text_log`. It does not correspond to any real bug. [#9726](https://github.com/ClickHouse/ClickHouse/pull/9726) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Improvement + +* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement + +* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement + +* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v20.3.17.173-lts, 2020-08-15 + +#### Bug Fix + +* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)). +* Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)). + + ### ClickHouse release v20.3.16.165-lts 2020-08-10 #### Bug Fix diff --git a/CMakeLists.txt b/CMakeLists.txt index 783a9f80b66..1daff973bb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,17 +154,19 @@ endif () # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") -find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") -if (OBJCOPY_PATH) - message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") +if (OS_LINUX) + find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") + if (OBJCOPY_PATH) + message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") - if (ARCH_AMD64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386) - elseif (ARCH_AARCH64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64) + if (ARCH_AMD64) + set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386) + elseif (ARCH_AARCH64) + set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64) + endif () + else () + message(FATAL_ERROR "Cannot find objcopy.") endif () -else () - message(FATAL_ERROR "Cannot find objcopy.") endif () if (OS_DARWIN) @@ -445,6 +447,7 @@ include (cmake/find/brotli.cmake) include (cmake/find/protobuf.cmake) include (cmake/find/grpc.cmake) include (cmake/find/pdqsort.cmake) +include (cmake/find/miniselect.cmake) include (cmake/find/hdfs3.cmake) # uses protobuf include (cmake/find/poco.cmake) include (cmake/find/curl.cmake) @@ -455,6 +458,8 @@ include (cmake/find/simdjson.cmake) include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/odbc.cmake) +include (cmake/find/rocksdb.cmake) + if(NOT USE_INTERNAL_PARQUET_LIBRARY) set (ENABLE_ORC OFF CACHE INTERNAL "") @@ -472,9 +477,6 @@ find_contrib_lib(cityhash) find_contrib_lib(farmhash) -set (USE_INTERNAL_BTRIE_LIBRARY ON CACHE INTERNAL "") -find_contrib_lib(btrie) - if (ENABLE_TESTS) include (cmake/find/gtest.cmake) endif () diff --git a/README.md b/README.md index 03b5c988586..fc39cf90861 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.tech) -ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. +ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real time. ## Useful Links @@ -14,9 +14,3 @@ ClickHouse is an open-source column-oriented database management system that all * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events - -* [The Second ClickHouse Meetup East (online)](https://www.eventbrite.com/e/the-second-clickhouse-meetup-east-tickets-126787955187) on October 31, 2020. -* [ClickHouse for Enterprise Meetup (online in Russian)](https://arenadata-events.timepad.ru/event/1465249/) on November 10, 2020. - diff --git a/base/common/StringRef.h b/base/common/StringRef.h index b51b95456cb..ac9d7c47b72 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -1,6 +1,7 @@ #pragma once #include +#include // for std::logic_error #include #include #include diff --git a/base/common/logger_useful.h b/base/common/logger_useful.h index f760d59de45..d3b4d38d546 100644 --- a/base/common/logger_useful.h +++ b/base/common/logger_useful.h @@ -3,7 +3,6 @@ /// Macros for convenient usage of Poco logger. #include -#include #include #include #include diff --git a/base/common/sort.h b/base/common/sort.h new file mode 100644 index 00000000000..2128014ca5e --- /dev/null +++ b/base/common/sort.h @@ -0,0 +1,37 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +# include // Y_IGNORE +#else +# include +#endif + +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_select(first, nth, last); +#else + ::std::nth_element(first, nth, last); +#endif +} + +template +void partial_sort(RandomIt first, RandomIt middle, RandomIt last) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_partial_sort(first, middle, last); +#else + ::std::partial_sort(first, middle, last); +#endif +} + +template +void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare); +#else + ::std::partial_sort(first, middle, last, compare); +#endif +} diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index d90bde30a43..2a889819c11 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -5,6 +5,9 @@ /// (See at http://www.boost.org/LICENSE_1_0.txt) #include "throwError.h" +#include +#include +#include namespace wide { @@ -192,7 +195,7 @@ struct integer::_impl } template - constexpr static auto to_Integral(T f) noexcept + __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { if constexpr (std::is_same_v) return f; @@ -225,25 +228,54 @@ struct integer::_impl self.items[i] = 0; } - constexpr static void wide_integer_from_bultin(integer & self, double rhs) noexcept - { - if ((rhs > 0 && rhs < std::numeric_limits::max()) || (rhs < 0 && rhs > std::numeric_limits::min())) + /** + * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310 + * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them. + * + * t = a1 * max_int + b1, a1 > max_int, b1 < max_int + * a1 = a2 * max_int + b2, a2 > max_int, b2 < max_int + * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case. + */ + template + constexpr static void set_multiplier(integer & self, T t) noexcept { + constexpr uint64_t max_int = std::numeric_limits::max(); + const T alpha = t / max_int; + + if (alpha <= max_int) + self = static_cast(alpha); + else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations. + set_multiplier(self, alpha); + + self *= max_int; + self += static_cast(t - alpha * max_int); // += b_i + } + + constexpr static void wide_integer_from_bultin(integer& self, double rhs) noexcept { + constexpr int64_t max_int = std::numeric_limits::max(); + constexpr int64_t min_int = std::numeric_limits::min(); + + /// There are values in int64 that have more than 53 significant bits (in terms of double + /// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up, + /// the result may not fit in 64 bits. + /// The example of such a number is 9.22337e+18. + /// As to_Integral does a static_cast to int64_t, it may result in UB. + /// The necessary check here is that long double has enough significant (mantissa) bits to store the + /// int64_t max value precisely. + static_assert(LDBL_MANT_DIG >= 64, + "On your system long double has less than 64 precision bits," + "which may result in UB when initializing double from int64_t"); + + if ((rhs > 0 && rhs < max_int) || (rhs < 0 && rhs > min_int)) { - self = to_Integral(rhs); + self = static_cast(rhs); return; } - long double r = rhs; - if (r < 0) - r = -r; + const long double rhs_long_double = (static_cast(rhs) < 0) + ? -static_cast(rhs) + : rhs; - size_t count = r / std::numeric_limits::max(); - self = count; - self *= std::numeric_limits::max(); - long double to_diff = count; - to_diff *= std::numeric_limits::max(); - - self += to_Integral(r - to_diff); + set_multiplier(self, rhs_long_double); if (rhs < 0) self = -self; diff --git a/base/common/ya.make b/base/common/ya.make index 02e0e90fe58..adbbe17b486 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/base/common/ya.make.in b/base/common/ya.make.in index 89c075da309..bcac67c7923 100644 --- a/base/common/ya.make.in +++ b/base/common/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/base/daemon/ya.make b/base/daemon/ya.make index 75ea54b6021..f3b4059f002 100644 --- a/base/daemon/ya.make +++ b/base/daemon/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() NO_COMPILER_WARNINGS() diff --git a/base/glibc-compatibility/musl/accept4.c b/base/glibc-compatibility/musl/accept4.c new file mode 100644 index 00000000000..59ab1726bdc --- /dev/null +++ b/base/glibc-compatibility/musl/accept4.c @@ -0,0 +1,19 @@ +#define _GNU_SOURCE +#include +#include +#include +#include "syscall.h" + +int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int flg) +{ + if (!flg) return accept(fd, addr, len); + int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0); + if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret; + ret = accept(fd, addr, len); + if (ret<0) return ret; + if (flg & SOCK_CLOEXEC) + __syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC); + if (flg & SOCK_NONBLOCK) + __syscall(SYS_fcntl, ret, F_SETFL, O_NONBLOCK); + return ret; +} diff --git a/base/glibc-compatibility/musl/epoll.c b/base/glibc-compatibility/musl/epoll.c new file mode 100644 index 00000000000..deff5b101aa --- /dev/null +++ b/base/glibc-compatibility/musl/epoll.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include "syscall.h" + +int epoll_create(int size) +{ + return epoll_create1(0); +} + +int epoll_create1(int flags) +{ + int r = __syscall(SYS_epoll_create1, flags); +#ifdef SYS_epoll_create + if (r==-ENOSYS && !flags) r = __syscall(SYS_epoll_create, 1); +#endif + return __syscall_ret(r); +} + +int epoll_ctl(int fd, int op, int fd2, struct epoll_event *ev) +{ + return syscall(SYS_epoll_ctl, fd, op, fd2, ev); +} + +int epoll_pwait(int fd, struct epoll_event *ev, int cnt, int to, const sigset_t *sigs) +{ + int r = __syscall(SYS_epoll_pwait, fd, ev, cnt, to, sigs, _NSIG/8); +#ifdef SYS_epoll_wait + if (r==-ENOSYS && !sigs) r = __syscall(SYS_epoll_wait, fd, ev, cnt, to); +#endif + return __syscall_ret(r); +} + +int epoll_wait(int fd, struct epoll_event *ev, int cnt, int to) +{ + return epoll_pwait(fd, ev, cnt, to, 0); +} diff --git a/base/glibc-compatibility/musl/eventfd.c b/base/glibc-compatibility/musl/eventfd.c new file mode 100644 index 00000000000..68e489c8364 --- /dev/null +++ b/base/glibc-compatibility/musl/eventfd.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include "syscall.h" + +int eventfd(unsigned int count, int flags) +{ + int r = __syscall(SYS_eventfd2, count, flags); +#ifdef SYS_eventfd + if (r==-ENOSYS && !flags) r = __syscall(SYS_eventfd, count); +#endif + return __syscall_ret(r); +} + +int eventfd_read(int fd, eventfd_t *value) +{ + return (sizeof(*value) == read(fd, value, sizeof(*value))) ? 0 : -1; +} + +int eventfd_write(int fd, eventfd_t value) +{ + return (sizeof(value) == write(fd, &value, sizeof(value))) ? 0 : -1; +} diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c new file mode 100644 index 00000000000..a429273fa1a --- /dev/null +++ b/base/glibc-compatibility/musl/getauxval.c @@ -0,0 +1,45 @@ +#include +#include // __environ +#include + +// We don't have libc struct available here. Compute aux vector manually. +static unsigned long * __auxv = NULL; +static unsigned long __auxv_secure = 0; + +static size_t __find_auxv(unsigned long type) +{ + size_t i; + for (i = 0; __auxv[i]; i += 2) + { + if (__auxv[i] == type) + return i + 1; + } + return (size_t) -1; +} + +__attribute__((constructor)) static void __auxv_init() +{ + size_t i; + for (i = 0; __environ[i]; i++); + __auxv = (unsigned long *) (__environ + i + 1); + + size_t secure_idx = __find_auxv(AT_SECURE); + if (secure_idx != ((size_t) -1)) + __auxv_secure = __auxv[secure_idx]; +} + +unsigned long getauxval(unsigned long type) +{ + if (type == AT_SECURE) + return __auxv_secure; + + if (__auxv) + { + size_t index = __find_auxv(type); + if (index != ((size_t) -1)) + return __auxv[index]; + } + + errno = ENOENT; + return 0; +} diff --git a/base/glibc-compatibility/musl/secure_getenv.c b/base/glibc-compatibility/musl/secure_getenv.c new file mode 100644 index 00000000000..fbd9ef3bdcc --- /dev/null +++ b/base/glibc-compatibility/musl/secure_getenv.c @@ -0,0 +1,8 @@ +#define _GNU_SOURCE +#include +#include + +char * secure_getenv(const char * name) +{ + return getauxval(AT_SECURE) ? NULL : getenv(name); +} diff --git a/base/glibc-compatibility/musl/sync_file_range.c b/base/glibc-compatibility/musl/sync_file_range.c new file mode 100644 index 00000000000..610b11c8c96 --- /dev/null +++ b/base/glibc-compatibility/musl/sync_file_range.c @@ -0,0 +1,21 @@ +#define _GNU_SOURCE +#include +#include +#include "syscall.h" + +// works same in x86_64 && aarch64 +#define __SYSCALL_LL_E(x) (x) +#define __SYSCALL_LL_O(x) (x) + +int sync_file_range(int fd, off_t pos, off_t len, unsigned flags) +{ +#if defined(SYS_sync_file_range2) + return syscall(SYS_sync_file_range2, fd, flags, + __SYSCALL_LL_E(pos), __SYSCALL_LL_E(len)); +#elif defined(SYS_sync_file_range) + return __syscall(SYS_sync_file_range, fd, + __SYSCALL_LL_O(pos), __SYSCALL_LL_E(len), flags); +#else + return __syscall_ret(-ENOSYS); +#endif +} \ No newline at end of file diff --git a/base/glibc-compatibility/musl/syscall.h b/base/glibc-compatibility/musl/syscall.h index 70b4688f642..3160357f252 100644 --- a/base/glibc-compatibility/musl/syscall.h +++ b/base/glibc-compatibility/musl/syscall.h @@ -13,3 +13,11 @@ long __syscall(syscall_arg_t, ...); __attribute__((visibility("hidden"))) void *__vdsosym(const char *, const char *); + +#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__)) + +#define socketcall(...) __syscall_ret(__socketcall(__VA_ARGS__)) + +#define __socketcall(nm,a,b,c,d,e,f) __syscall(SYS_##nm, a, b, c, d, e, f) + +#define socketcall_cp socketcall diff --git a/base/glibc-compatibility/musl/vdso.c b/base/glibc-compatibility/musl/vdso.c index c0dd0f33e4e..b108c4ef752 100644 --- a/base/glibc-compatibility/musl/vdso.c +++ b/base/glibc-compatibility/musl/vdso.c @@ -40,24 +40,10 @@ static int checkver(Verdef *def, int vsym, const char *vername, char *strings) #define OK_TYPES (1<e_phoff); size_t *dynv=0, base=-1; diff --git a/base/loggers/ya.make b/base/loggers/ya.make index 6cb95633c72..943b6f12b73 100644 --- a/base/loggers/ya.make +++ b/base/loggers/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/base/pcg-random/pcg_random.hpp b/base/pcg-random/pcg_random.hpp index d96d5895b31..f7778480c4c 100644 --- a/base/pcg-random/pcg_random.hpp +++ b/base/pcg-random/pcg_random.hpp @@ -113,6 +113,12 @@ #include "pcg_extras.hpp" +namespace DB +{ + struct PcgSerializer; + struct PcgDeserializer; +} + namespace pcg_detail { using namespace pcg_extras; @@ -557,6 +563,9 @@ public: engine& rng); + + friend ::DB::PcgSerializer; + friend ::DB::PcgDeserializer; }; template [DESCRIPTORS ] [EXPORT_MACRO ] [...]) @@ -242,6 +247,7 @@ find_library(gRPC_LIBRARY NAMES grpc) find_library(gRPC_CPP_LIBRARY NAMES grpc++) find_library(gRPC_UNSECURE_LIBRARY NAMES grpc_unsecure) find_library(gRPC_CPP_UNSECURE_LIBRARY NAMES grpc++_unsecure) +find_library(gRPC_CARES_LIBRARY NAMES cares) set(gRPC_LIBRARIES) if(gRPC_USE_UNSECURE_LIBRARIES) @@ -259,6 +265,7 @@ else() set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CPP_LIBRARY}) endif() endif() +set(gRPC_LIBRARIES ${gRPC_LIBRARIES} ${gRPC_CARES_LIBRARY}) # Restore the original find library ordering. if(gRPC_USE_STATIC_LIBS) @@ -278,11 +285,11 @@ else() endif() # Get full path to plugin. -find_program(_gRPC_CPP_PLUGIN +find_program(gRPC_CPP_PLUGIN NAMES grpc_cpp_plugin DOC "The plugin for generating gRPC client and server C++ stubs from `.proto` files") -find_program(_gRPC_PYTHON_PLUGIN +find_program(gRPC_PYTHON_PLUGIN NAMES grpc_python_plugin DOC "The plugin for generating gRPC client and server Python stubs from `.proto` files") @@ -317,14 +324,14 @@ endif() #include(FindPackageHandleStandardArgs.cmake) FIND_PACKAGE_HANDLE_STANDARD_ARGS(gRPC - REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY - gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR _gRPC_CPP_PLUGIN _gRPC_PYTHON_PLUGIN) + REQUIRED_VARS gRPC_LIBRARY gRPC_CPP_LIBRARY gRPC_UNSECURE_LIBRARY gRPC_CPP_UNSECURE_LIBRARY gRPC_CARES_LIBRARY + gRPC_INCLUDE_DIR gRPC_CPP_INCLUDE_DIR gRPC_CPP_PLUGIN gRPC_PYTHON_PLUGIN) if(gRPC_FOUND) if(gRPC_DEBUG) message(STATUS "gRPC: INCLUDE_DIRS=${gRPC_INCLUDE_DIRS}") message(STATUS "gRPC: LIBRARIES=${gRPC_LIBRARIES}") - message(STATUS "gRPC: CPP_PLUGIN=${_gRPC_CPP_PLUGIN}") - message(STATUS "gRPC: PYTHON_PLUGIN=${_gRPC_PYTHON_PLUGIN}") + message(STATUS "gRPC: CPP_PLUGIN=${gRPC_CPP_PLUGIN}") + message(STATUS "gRPC: PYTHON_PLUGIN=${gRPC_PYTHON_PLUGIN}") endif() endif() diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 0e65568f185..87a30c9effc 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54443) +SET(VERSION_REVISION 54444) SET(VERSION_MAJOR 20) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH c53725fb1f846fda074347607ab582fbb9c6f7a1) -SET(VERSION_DESCRIBE v20.12.1.1-prestable) -SET(VERSION_STRING 20.12.1.1) +SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471) +SET(VERSION_DESCRIBE v20.13.1.1-prestable) +SET(VERSION_STRING 20.13.1.1) # end of autochange diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 7b57e63f4ee..4ee1bcdcfbf 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -12,13 +12,7 @@ set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) # Minimal supported SDK version - -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.15") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.15") -set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mmacosx-version-min=10.15") - -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.15") -set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.15") +set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) # Global libraries diff --git a/cmake/find/avro.cmake b/cmake/find/avro.cmake index e0f73d99111..74ccda3489f 100644 --- a/cmake/find/avro.cmake +++ b/cmake/find/avro.cmake @@ -1,3 +1,4 @@ +# Needed when using Apache Avro serialization format option (ENABLE_AVRO "Enable Avro" ${ENABLE_LIBRARIES}) if (NOT ENABLE_AVRO) diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake index fa283d98225..017a7b094b0 100644 --- a/cmake/find/grpc.cmake +++ b/cmake/find/grpc.cmake @@ -37,8 +37,8 @@ if(NOT USE_INTERNAL_GRPC_LIBRARY) if(NOT gRPC_INCLUDE_DIRS OR NOT gRPC_LIBRARIES) message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system gRPC library") set(EXTERNAL_GRPC_LIBRARY_FOUND 0) - elseif(NOT _gRPC_CPP_PLUGIN) - message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grcp_cpp_plugin") + elseif(NOT gRPC_CPP_PLUGIN) + message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find system grpc_cpp_plugin") set(EXTERNAL_GRPC_LIBRARY_FOUND 0) else() set(EXTERNAL_GRPC_LIBRARY_FOUND 1) @@ -53,8 +53,8 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY) else() set(gRPC_LIBRARIES grpc grpc++) endif() - set(_gRPC_CPP_PLUGIN $) - set(_gRPC_PROTOC_EXECUTABLE $) + set(gRPC_CPP_PLUGIN $) + set(gRPC_PYTHON_PLUGIN $) include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") @@ -62,4 +62,4 @@ if(NOT EXTERNAL_GRPC_LIBRARY_FOUND AND NOT MISSING_INTERNAL_GRPC_LIBRARY) set(USE_GRPC 1) endif() -message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${_gRPC_CPP_PLUGIN}") +message(STATUS "Using gRPC=${USE_GRPC}: ${gRPC_INCLUDE_DIRS} : ${gRPC_LIBRARIES} : ${gRPC_CPP_PLUGIN}") diff --git a/cmake/find/miniselect.cmake b/cmake/find/miniselect.cmake new file mode 100644 index 00000000000..0a50c9bf4a8 --- /dev/null +++ b/cmake/find/miniselect.cmake @@ -0,0 +1,2 @@ +set(MINISELECT_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/miniselect/include) +message(STATUS "Using miniselect: ${MINISELECT_INCLUDE_DIR}") diff --git a/cmake/find/rocksdb.cmake b/cmake/find/rocksdb.cmake new file mode 100644 index 00000000000..968cdb52407 --- /dev/null +++ b/cmake/find/rocksdb.cmake @@ -0,0 +1,67 @@ +option(ENABLE_ROCKSDB "Enable ROCKSDB" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_ROCKSDB) + if (USE_INTERNAL_ROCKSDB_LIBRARY) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal rocksdb library with ENABLE_ROCKSDB=OFF") + endif() + return() +endif() + +option(USE_INTERNAL_ROCKSDB_LIBRARY "Set to FALSE to use system ROCKSDB library instead of bundled" ${NOT_UNBUNDLED}) + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/CMakeLists.txt") + if (USE_INTERNAL_ROCKSDB_LIBRARY) + message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init --recursive") + message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal rocksdb") + endif() + set (MISSING_INTERNAL_ROCKSDB 1) +endif () + +if (NOT USE_INTERNAL_ROCKSDB_LIBRARY) + find_library (ROCKSDB_LIBRARY rocksdb) + find_path (ROCKSDB_INCLUDE_DIR NAMES rocksdb/db.h PATHS ${ROCKSDB_INCLUDE_PATHS}) + if (NOT ROCKSDB_LIBRARY OR NOT ROCKSDB_INCLUDE_DIR) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system rocksdb library") + endif() + + if (NOT SNAPPY_LIBRARY) + include(cmake/find/snappy.cmake) + endif() + if (NOT ZLIB_LIBRARY) + include(cmake/find/zlib.cmake) + endif() + + find_package(BZip2) + find_library(ZSTD_LIBRARY zstd) + find_library(LZ4_LIBRARY lz4) + find_library(GFLAGS_LIBRARY gflags) + + if(SNAPPY_LIBRARY AND ZLIB_LIBRARY AND LZ4_LIBRARY AND BZIP2_FOUND AND ZSTD_LIBRARY AND GFLAGS_LIBRARY) + list (APPEND ROCKSDB_LIBRARY ${SNAPPY_LIBRARY}) + list (APPEND ROCKSDB_LIBRARY ${ZLIB_LIBRARY}) + list (APPEND ROCKSDB_LIBRARY ${LZ4_LIBRARY}) + list (APPEND ROCKSDB_LIBRARY ${BZIP2_LIBRARY}) + list (APPEND ROCKSDB_LIBRARY ${ZSTD_LIBRARY}) + list (APPEND ROCKSDB_LIBRARY ${GFLAGS_LIBRARY}) + else() + message (${RECONFIGURE_MESSAGE_LEVEL} + "Can't find system rocksdb: snappy=${SNAPPY_LIBRARY} ;" + " zlib=${ZLIB_LIBRARY} ;" + " lz4=${LZ4_LIBRARY} ;" + " bz2=${BZIP2_LIBRARY} ;" + " zstd=${ZSTD_LIBRARY} ;" + " gflags=${GFLAGS_LIBRARY} ;") + endif() +endif () + +if(ROCKSDB_LIBRARY AND ROCKSDB_INCLUDE_DIR) + set(USE_ROCKSDB 1) +elseif (NOT MISSING_INTERNAL_ROCKSDB) + set (USE_INTERNAL_ROCKSDB_LIBRARY 1) + + set (ROCKSDB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/include") + set (ROCKSDB_LIBRARY "rocksdb") + set (USE_ROCKSDB 1) +endif () + +message (STATUS "Using ROCKSDB=${USE_ROCKSDB}: ${ROCKSDB_INCLUDE_DIR} : ${ROCKSDB_LIBRARY}") diff --git a/cmake/find/ssl.cmake b/cmake/find/ssl.cmake index 9058857c173..f7ac9174202 100644 --- a/cmake/find/ssl.cmake +++ b/cmake/find/ssl.cmake @@ -1,3 +1,5 @@ +# Needed when securely connecting to an external server, e.g. +# clickhouse-client --host ... --secure option(ENABLE_SSL "Enable ssl" ${ENABLE_LIBRARIES}) if(NOT ENABLE_SSL) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index c5f3ce47775..8fa4a1129ed 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -23,7 +23,7 @@ option (WEVERYTHING "Enable -Weverything option with some exceptions." ON) # Control maximum size of stack frames. It can be important if the code is run in fibers with small stack size. # Only in release build because debug has too large stack frames. -if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE)) +if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")) add_warning(frame-larger-than=32768) endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index bec5f42c3e0..d9e5aa05aa5 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -36,6 +36,7 @@ add_subdirectory (murmurhash) add_subdirectory (replxx-cmake) add_subdirectory (ryu-cmake) add_subdirectory (unixodbc-cmake) +add_subdirectory (xz) add_subdirectory (poco-cmake) add_subdirectory (croaring-cmake) @@ -65,10 +66,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY) add_subdirectory (libfarmhash) endif () -if (USE_INTERNAL_BTRIE_LIBRARY) - add_subdirectory (libbtrie) -endif () - if (USE_INTERNAL_ZLIB_LIBRARY) set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "") set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "") @@ -289,3 +286,7 @@ if (USE_KRB5) add_subdirectory (cyrus-sasl-cmake) endif() endif() + +if (USE_INTERNAL_ROCKSDB_LIBRARY) + add_subdirectory(rocksdb-cmake) +endif() diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp new file mode 160000 index 00000000000..4f3b686f86c --- /dev/null +++ b/contrib/abseil-cpp @@ -0,0 +1 @@ +Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54 diff --git a/contrib/cctz b/contrib/cctz index 7a2db4ece6e..260ba195ef6 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7a2db4ece6e0f1b246173cbdb62711ae258ee841 +Subproject commit 260ba195ef6c489968bae8c88c62a67cdac5ff9d diff --git a/contrib/grpc b/contrib/grpc index a6570b863cf..7436366ceb3 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit a6570b863cf76c9699580ba51c7827d5bffaac43 +Subproject commit 7436366ceb341ba5c00ea29f1645e02a2b70bf93 diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 5ab70d83429..efb0f1c4f43 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc") set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc") +# Use re2 from ClickHouse contrib, not from gRPC third_party. if(NOT RE2_INCLUDE_DIR) message(FATAL_ERROR " grpc: The location of the \"re2\" library is unknown") endif() @@ -8,6 +9,7 @@ set(gRPC_RE2_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_RE2_INCLUDE_DIR "${RE2_INCLUDE_DIR}") set(_gRPC_RE2_LIBRARIES "${RE2_LIBRARY}") +# Use zlib from ClickHouse contrib, not from gRPC third_party. if(NOT ZLIB_INCLUDE_DIRS) message(FATAL_ERROR " grpc: The location of the \"zlib\" library is unknown") endif() @@ -15,6 +17,7 @@ set(gRPC_ZLIB_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_INCLUDE_DIRS}") set(_gRPC_ZLIB_LIBRARIES "${ZLIB_LIBRARIES}") +# Use protobuf from ClickHouse contrib, not from gRPC third_party. if(NOT Protobuf_INCLUDE_DIR OR NOT Protobuf_LIBRARY) message(FATAL_ERROR " grpc: The location of the \"protobuf\" library is unknown") elseif (NOT Protobuf_PROTOC_EXECUTABLE) @@ -29,21 +32,33 @@ set(_gRPC_PROTOBUF_PROTOC "protoc") set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE "${Protobuf_PROTOC_EXECUTABLE}") set(_gRPC_PROTOBUF_PROTOC_LIBRARIES "${Protobuf_PROTOC_LIBRARY}") +# Use OpenSSL from ClickHouse contrib, not from gRPC third_party. set(gRPC_SSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) set(_gRPC_SSL_LIBRARIES ${OPENSSL_LIBRARIES}) +# Use abseil-cpp from ClickHouse contrib, not from gRPC third_party. +set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE) +set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") +if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt") + message(FATAL_ERROR " grpc: submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive") +endif() +add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp") + +# Choose to build static or shared library for c-ares. +if (MAKE_STATIC_LIBRARIES) + set(CARES_STATIC ON CACHE BOOL "" FORCE) + set(CARES_SHARED OFF CACHE BOOL "" FORCE) +else () + set(CARES_STATIC OFF CACHE BOOL "" FORCE) + set(CARES_SHARED ON CACHE BOOL "" FORCE) +endif () + # We don't want to build C# extensions. set(gRPC_BUILD_CSHARP_EXT OFF) -# We don't want to build abseil tests, so we temporarily switch BUILD_TESTING off. -set(_gRPC_ORIG_BUILD_TESTING ${BUILD_TESTING}) -set(BUILD_TESTING OFF) - add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}") -set(BUILD_TESTING ${_gRPC_ORIG_BUILD_TESTING}) - # The contrib/grpc/CMakeLists.txt redefined the PROTOBUF_GENERATE_GRPC_CPP() function for its own purposes, # so we need to redefine it back. include("${ClickHouse_SOURCE_DIR}/contrib/grpc-cmake/protobuf_generate_grpc.cmake") diff --git a/contrib/libbtrie/CMakeLists.txt b/contrib/libbtrie/CMakeLists.txt deleted file mode 100644 index 2b0c8e3fd75..00000000000 --- a/contrib/libbtrie/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_library(btrie - src/btrie.c - include/btrie.h -) - -target_include_directories (btrie SYSTEM PUBLIC include) diff --git a/contrib/libbtrie/LICENSE b/contrib/libbtrie/LICENSE deleted file mode 100644 index d386c6f7b79..00000000000 --- a/contrib/libbtrie/LICENSE +++ /dev/null @@ -1,23 +0,0 @@ -Copyright (c) 2013, CobbLiu -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libbtrie/include/btrie.h b/contrib/libbtrie/include/btrie.h deleted file mode 100644 index 6d805108e7a..00000000000 --- a/contrib/libbtrie/include/btrie.h +++ /dev/null @@ -1,160 +0,0 @@ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -#include -#include - -/** - * In btrie, each leaf means one bit in ip tree. - * Left means 0, and right means 1. - */ - -#define BTRIE_NULL (uintptr_t) -1 - -#if !defined(BTRIE_MAX_PAGES) -/// 54 ip per page. 8 bytes memory per page when empty -#define BTRIE_MAX_PAGES 1024 * 2048 /// 128m ips , ~16mb ram when empty -// #define BTRIE_MAX_PAGES 1024 * 65535 /// 4g ips (whole ipv4), ~512mb ram when empty -#endif - -typedef struct btrie_node_s btrie_node_t; - -struct btrie_node_s { - btrie_node_t *right; - btrie_node_t *left; - btrie_node_t *parent; - uintptr_t value; -}; - - -typedef struct btrie_s { - btrie_node_t *root; - - btrie_node_t *free; /* free list of btrie */ - char *start; - size_t size; - - /* - * memory pool. - * memory management(esp free) will be so easy by using this facility. - */ - char *pools[BTRIE_MAX_PAGES]; - size_t len; -} btrie_t; - - -/** - * Create an empty btrie - * - * @Return: - * An ip radix_tree created. - * NULL if creation failed. - */ - -btrie_t *btrie_create(); - -/** - * Destroy the ip radix_tree - * - * @Return: - * OK if deletion succeed. - * ERROR if error occurs while deleting. - */ -int btrie_destroy(btrie_t *tree); - -/** - * Count the nodes in the radix tree. - */ -size_t btrie_count(btrie_t *tree); - -/** - * Return the allocated number of bytes. - */ -size_t btrie_allocated(btrie_t *tree); - - -/** - * Add an ipv4 into btrie - * - * @Args: - * key: ip address - * mask: key's mask - * value: value of this IP, may be NULL. - * - * @Return: - * OK for success. - * ERROR for failure. - */ -int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask, - uintptr_t value); - - -/** - * Delete an ipv4 from btrie - * - * @Args: - * - * @Return: - * OK for success. - * ERROR for failure. - */ -int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask); - - -/** - * Find an ipv4 from btrie - * - - * @Args: - * - * @Return: - * Value if succeed. - * NULL if failed. - */ -uintptr_t btrie_find(btrie_t *tree, uint32_t key); - - -/** - * Add an ipv6 into btrie - * - * @Args: - * key: ip address - * mask: key's mask - * value: value of this IP, may be NULL. - * - * @Return: - * OK for success. - * ERROR for failure. - */ -int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask, - uintptr_t value); - -/** - * Delete an ipv6 from btrie - * - * @Args: - * - * @Return: - * OK for success. - * ERROR for failure. - */ -int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask); - -/** - * Find an ipv6 from btrie - * - - * @Args: - * - * @Return: - * Value if succeed. - * NULL if failed. - */ -uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key); - -#if defined (__cplusplus) -} -#endif \ No newline at end of file diff --git a/contrib/libbtrie/src/btrie.c b/contrib/libbtrie/src/btrie.c deleted file mode 100644 index f9353019ac1..00000000000 --- a/contrib/libbtrie/src/btrie.c +++ /dev/null @@ -1,460 +0,0 @@ -#include -#include -#include - -#define PAGE_SIZE 4096 - - -static btrie_node_t * -btrie_alloc(btrie_t *tree) -{ - btrie_node_t *p; - - if (tree->free) { - p = tree->free; - tree->free = tree->free->right; - return p; - } - - if (tree->size < sizeof(btrie_node_t)) { - tree->start = (char *) calloc(sizeof(char), PAGE_SIZE); - if (tree->start == NULL) { - return NULL; - } - - tree->pools[tree->len++] = tree->start; - tree->size = PAGE_SIZE; - } - - p = (btrie_node_t *) tree->start; - - tree->start += sizeof(btrie_node_t); - tree->size -= sizeof(btrie_node_t); - - return p; -} - - -btrie_t * -btrie_create() -{ - btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t)); - if (tree == NULL) { - return NULL; - } - - tree->free = NULL; - tree->start = NULL; - tree->size = 0; - memset(tree->pools, 0, sizeof(btrie_t *) * BTRIE_MAX_PAGES); - tree->len = 0; - - tree->root = btrie_alloc(tree); - if (tree->root == NULL) { - return NULL; - } - - tree->root->right = NULL; - tree->root->left = NULL; - tree->root->parent = NULL; - tree->root->value = BTRIE_NULL; - - return tree; -} - -static size_t -subtree_weight(btrie_node_t *node) -{ - size_t weight = 1; - if (node->left) { - weight += subtree_weight(node->left); - } - if (node->right) { - weight += subtree_weight(node->right); - } - return weight; -} - -size_t -btrie_count(btrie_t *tree) -{ - if (tree->root == NULL) { - return 0; - } - - return subtree_weight(tree->root); -} - -size_t -btrie_allocated(btrie_t *tree) -{ - return tree->len * PAGE_SIZE; -} - - -int -btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask, - uintptr_t value) -{ - uint32_t bit; - btrie_node_t *node, *next; - - bit = 0x80000000; - - node = tree->root; - next = tree->root; - - while (bit & mask) { - if (key & bit) { - next = node->right; - - } else { - next = node->left; - } - - if (next == NULL) { - break; - } - - bit >>= 1; - node = next; - } - - if (next) { - if (node->value != BTRIE_NULL) { - return -1; - } - - node->value = value; - return 0; - } - - while (bit & mask) { - next = btrie_alloc(tree); - if (next == NULL) { - return -1; - } - - next->right = NULL; - next->left = NULL; - next->parent = node; - next->value = BTRIE_NULL; - - if (key & bit) { - node->right = next; - - } else { - node->left = next; - } - - bit >>= 1; - node = next; - } - - node->value = value; - - return 0; -} - - -int -btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask) -{ - uint32_t bit; - btrie_node_t *node; - - bit = 0x80000000; - node = tree->root; - - while (node && (bit & mask)) { - if (key & bit) { - node = node->right; - - } else { - node = node->left; - } - - bit >>= 1; - } - - if (node == NULL) { - return -1; - } - - if (node->right || node->left) { - if (node->value != BTRIE_NULL) { - node->value = BTRIE_NULL; - return 0; - } - - return -1; - } - - for ( ;; ) { - if (node->parent->right == node) { - node->parent->right = NULL; - - } else { - node->parent->left = NULL; - } - - node->right = tree->free; - tree->free = node; - - node = node->parent; - - if (node->right || node->left) { - break; - } - - if (node->value != BTRIE_NULL) { - break; - } - - if (node->parent == NULL) { - break; - } - } - - return 0; -} - - -uintptr_t -btrie_find(btrie_t *tree, uint32_t key) -{ - uint32_t bit; - uintptr_t value; - btrie_node_t *node; - - bit = 0x80000000; - value = BTRIE_NULL; - node = tree->root; - - while (node) { - if (node->value != BTRIE_NULL) { - value = node->value; - } - - if (key & bit) { - node = node->right; - - } else { - node = node->left; - } - - bit >>= 1; - } - - return value; -} - - -int -btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask, - uintptr_t value) -{ - uint8_t bit; - unsigned int i; - btrie_node_t *node, *next; - - i = 0; - bit = 0x80; - - node = tree->root; - next = tree->root; - - while (bit & mask[i]) { - if (key[i] & bit) { - next = node->right; - - } else { - next = node->left; - } - - if (next == NULL) { - break; - } - - bit >>= 1; - node = next; - - if (bit == 0) { - if (++i == 16) { - break; - } - - bit = 0x80; - } - } - - if (next) { - if (node->value != BTRIE_NULL) { - return -1; - } - - node->value = value; - return 0; - } - - while (bit & mask[i]) { - next = btrie_alloc(tree); - if (next == NULL) { - return -1; - } - - next->right = NULL; - next->left = NULL; - next->parent = node; - next->value = BTRIE_NULL; - - if (key[i] & bit) { - node->right = next; - - } else { - node->left = next; - } - - bit >>= 1; - node = next; - - if (bit == 0) { - if (++i == 16) { - break; - } - - bit = 0x80; - } - } - - node->value = value; - - return 0; -} - - -int -btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask) -{ - uint8_t bit; - unsigned int i; - btrie_node_t *node; - - i = 0; - bit = 0x80; - node = tree->root; - - while (node && (bit & mask[i])) { - if (key[i] & bit) { - node = node->right; - - } else { - node = node->left; - } - - bit >>= 1; - - if (bit == 0) { - if (++i == 16) { - break; - } - - bit = 0x80; - } - } - - if (node == NULL) { - return -1; - } - - if (node->right || node->left) { - if (node->value != BTRIE_NULL) { - node->value = BTRIE_NULL; - return 0; - } - - return -1; - } - - for ( ;; ) { - if (node->parent->right == node) { - node->parent->right = NULL; - - } else { - node->parent->left = NULL; - } - - node->right = tree->free; - tree->free = node; - - node = node->parent; - - if (node->right || node->left) { - break; - } - - if (node->value != BTRIE_NULL) { - break; - } - - if (node->parent == NULL) { - break; - } - } - - return 0; -} - - -uintptr_t -btrie_find_a6(btrie_t *tree, const uint8_t *key) -{ - uint8_t bit; - uintptr_t value; - unsigned int i; - btrie_node_t *node; - - i = 0; - bit = 0x80; - value = BTRIE_NULL; - node = tree->root; - - while (node) { - if (node->value != BTRIE_NULL) { - value = node->value; - } - - if (key[i] & bit) { - node = node->right; - - } else { - node = node->left; - } - - bit >>= 1; - - if (bit == 0) { - i++; - bit = 0x80; - } - } - - return value; -} - - -int -btrie_destroy(btrie_t *tree) -{ - size_t i; - - - /* free memory pools */ - for (i = 0; i < tree->len; i++) { - free(tree->pools[i]); - } - - free(tree); - - return 0; -} diff --git a/contrib/libbtrie/test/test_btrie.c b/contrib/libbtrie/test/test_btrie.c deleted file mode 100644 index 2bbf2b2db7e..00000000000 --- a/contrib/libbtrie/test/test_btrie.c +++ /dev/null @@ -1,103 +0,0 @@ -#include -#include - -int main() -{ - btrie_t *it; - int ret; - - uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef}; - uint8_t mask_v6[16] = {0xff, 0xff, 0xff}; - uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde}; - - it = btrie_create(); - if (it == NULL) { - printf("create error!\n"); - return 0; - } - - //add 101.45.69.50/16 - ret = btrie_insert(it, 1697465650, 0xffff0000, 1); - if (ret != 0) { - printf("insert 1 error.\n"); - goto error; - } - - //add 10.45.69.50/16 - ret = btrie_insert(it, 170738994, 0xffff0000, 1); - if (ret != 0) { - printf("insert 2 error.\n"); - goto error; - } - - //add 10.45.79.50/16 - ret = btrie_insert(it, 170741554, 0xffff0000, 1); - if (ret == 0) { - printf("insert 3 error.\n"); - goto error; - } - - //add 102.45.79.50/24 - ret = btrie_insert(it, 1714245426, 0xffffff00, 1); - if (ret != 0) { - printf("insert 4 error.\n"); - goto error; - } - - ret = btrie_find(it, 170741554); - if (ret == 1) { - printf("test case 1 passed\n"); - } else { - printf("test case 1 error\n"); - } - - ret = btrie_find(it, 170786817); - if (ret != 1) { - printf("test case 2 passed\n"); - } else { - printf("test case 2 error\n"); - } - - ret = btrie_delete(it, 1714245426, 0xffffff00); - if (ret != 0) { - printf("delete 1 error\n"); - goto error; - } - - ret = btrie_find(it, 1714245426); - if (ret != 1) { - printf("test case 3 passed\n"); - } else { - printf("test case 3 error\n"); - } - - //add dead:beef::/32 - ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1); - if (ret != 0) { - printf("insert 5 error\n"); - goto error; - } - - ret = btrie_find_a6(it, ip_v6); - if (ret == 1) { - printf("test case 4 passed\n"); - } else { - printf("test case 4 error\n"); - } - - // insert 4m ips - for (size_t ip = 1; ip < 1024 * 1024 * 4; ++ip) { - ret = btrie_insert(it, ip, 0xffffffff, 1); - if (ret != 0) { - printf("insert 5 error (%d) (%zu) .\n", ret, ip); - goto error; - } - } - - return 0; - - error: - btrie_destroy(it); - printf("test failed\n"); - return 1; -} diff --git a/contrib/libunwind b/contrib/libunwind index 27026ef4a9c..7d78d361891 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 27026ef4a9c6c8cc956d1d131c4d794e24096981 +Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 82b3b9c0de5..3afff30eee7 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -22,7 +22,16 @@ set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-st set(LIBUNWIND_ASM_SOURCES ${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S ${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S) -set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C) + +# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1] +# Workaround these two issues by compiling as C. +# +# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771 +if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19) + set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C) +else() + enable_language(ASM) +endif() set(LIBUNWIND_SOURCES ${LIBUNWIND_CXX_SOURCES} diff --git a/contrib/miniselect b/contrib/miniselect new file mode 160000 index 00000000000..be0af6bd0b6 --- /dev/null +++ b/contrib/miniselect @@ -0,0 +1 @@ +Subproject commit be0af6bd0b6eb044d1acc4f754b229972d99903a diff --git a/contrib/protobuf b/contrib/protobuf index 445d1ae73a4..73b12814204 160000 --- a/contrib/protobuf +++ b/contrib/protobuf @@ -1 +1 @@ -Subproject commit 445d1ae73a450b1e94622e7040989aa2048402e3 +Subproject commit 73b12814204ad9068ba352914d0dc244648b48ee diff --git a/contrib/rocksdb b/contrib/rocksdb new file mode 160000 index 00000000000..35d8e36ef1b --- /dev/null +++ b/contrib/rocksdb @@ -0,0 +1 @@ +Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt new file mode 100644 index 00000000000..9e850a6e781 --- /dev/null +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -0,0 +1,673 @@ +## this file is extracted from `contrib/rocksdb/CMakeLists.txt` +set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") +list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") + +find_program(CCACHE_FOUND ccache) +if(CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) +endif(CCACHE_FOUND) + +if (SANITIZE STREQUAL "undefined") + set(WITH_UBSAN ON) +elseif (SANITIZE STREQUAL "address") + set(WITH_ASAN ON) +elseif (SANITIZE STREQUAL "thread") + set(WITH_TSAN ON) +endif() + + +set(PORTABLE ON) +## always disable jemalloc for rocksdb by default +## because it introduces non-standard jemalloc APIs +option(WITH_JEMALLOC "build with JeMalloc" OFF) +option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) +## lz4, zlib, zstd is enabled in ClickHouse by default +option(WITH_LZ4 "build with lz4" ON) +option(WITH_ZLIB "build with zlib" ON) +option(WITH_ZSTD "build with zstd" ON) + +# third-party/folly is only validated to work on Linux and Windows for now. +# So only turn it on there by default. +if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") + if(MSVC AND MSVC_VERSION LESS 1910) + # Folly does not compile with MSVC older than VS2017 + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) + else() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) + endif() +else() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) +endif() + +if( NOT DEFINED CMAKE_CXX_STANDARD ) + set(CMAKE_CXX_STANDARD 11) +endif() + +if(MSVC) + option(WITH_XPRESS "build with windows built in compression" OFF) + include(${ROCKSDB_SOURCE_DIR}/thirdparty.inc) +else() + if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") + # FreeBSD has jemalloc as default malloc + # but it does not have all the jemalloc files in include/... + set(WITH_JEMALLOC ON) + else() + if(WITH_JEMALLOC) + add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) + list(APPEND THIRDPARTY_LIBS jemalloc) + endif() + endif() + + if(WITH_SNAPPY) + add_definitions(-DSNAPPY) + list(APPEND THIRDPARTY_LIBS snappy) + endif() + + if(WITH_ZLIB) + add_definitions(-DZLIB) + list(APPEND THIRDPARTY_LIBS zlib) + endif() + + if(WITH_LZ4) + add_definitions(-DLZ4) + list(APPEND THIRDPARTY_LIBS lz4) + endif() + + if(WITH_ZSTD) + add_definitions(-DZSTD) + include_directories(${ZSTD_INCLUDE_DIR}) + include_directories(${ZSTD_INCLUDE_DIR}/common) + include_directories(${ZSTD_INCLUDE_DIR}/dictBuilder) + include_directories(${ZSTD_INCLUDE_DIR}/deprecated) + + list(APPEND THIRDPARTY_LIBS zstd) + endif() +endif() + +string(TIMESTAMP TS "%Y/%m/%d %H:%M:%S" UTC) +set(GIT_DATE_TIME "${TS}" CACHE STRING "the time we first built rocksdb") + +find_package(Git) + +if(GIT_FOUND AND EXISTS "${ROCKSDB_SOURCE_DIR}/.git") + if(WIN32) + execute_process(COMMAND $ENV{COMSPEC} /C ${GIT_EXECUTABLE} -C ${ROCKSDB_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) + else() + execute_process(COMMAND ${GIT_EXECUTABLE} -C ${ROCKSDB_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA) + endif() +else() + set(GIT_SHA 0) +endif() + +string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA "${GIT_SHA}") + +set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/rocksdb_build_version.cc) +configure_file(${ROCKSDB_SOURCE_DIR}/util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) +add_library(rocksdb_build_version OBJECT ${BUILD_VERSION_CC}) +target_include_directories(rocksdb_build_version PRIVATE + ${ROCKSDB_SOURCE_DIR}/util) +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing") + if(MINGW) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables") + add_definitions(-D_POSIX_C_SOURCE=1) + endif() + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") + include(CheckCXXCompilerFlag) + CHECK_CXX_COMPILER_FLAG("-momit-leaf-frame-pointer" HAVE_OMIT_LEAF_FRAME_POINTER) + if(HAVE_OMIT_LEAF_FRAME_POINTER) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -momit-leaf-frame-pointer") + endif() + endif() +endif() + +include(CheckCCompilerFlag) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") + CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) + if(HAS_POWER9) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9") + else() + CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8) + if(HAS_POWER8) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8") + endif(HAS_POWER8) + endif(HAS_POWER9) + CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) + if(HAS_ALTIVEC) + message(STATUS " HAS_ALTIVEC yes") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") + endif(HAS_ALTIVEC) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC) + if(HAS_ARMV8_CRC) + message(STATUS " HAS_ARMV8_CRC yes") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + endif(HAS_ARMV8_CRC) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + + +include(CheckCXXSourceCompiles) +if(NOT MSVC) + set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul") +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#include +#include +#include +int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + const auto a = _mm_set_epi64x(0, 0); + const auto b = _mm_set_epi64x(0, 0); + const auto c = _mm_clmulepi64_si128(a, b, 0x00); + auto d = _mm_cvtsi128_si64(c); +} +" HAVE_SSE42) +unset(CMAKE_REQUIRED_FLAGS) +if(HAVE_SSE42) + add_definitions(-DHAVE_SSE42) + add_definitions(-DHAVE_PCLMUL) +elseif(FORCE_SSE42) + message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#if defined(_MSC_VER) && !defined(__thread) +#define __thread __declspec(thread) +#endif +int main() { + static __thread int tls; +} +" HAVE_THREAD_LOCAL) +if(HAVE_THREAD_LOCAL) + add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) +endif() + +option(FAIL_ON_WARNINGS "Treat compile warnings as errors" ON) +if(FAIL_ON_WARNINGS) + if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") + else() # assume GCC + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + endif() +endif() + +option(WITH_ASAN "build with ASAN" OFF) +if(WITH_ASAN) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address") + if(WITH_JEMALLOC) + message(FATAL "ASAN does not work well with JeMalloc") + endif() +endif() + +option(WITH_TSAN "build with TSAN" OFF) +if(WITH_TSAN) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -pie") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -fPIC") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread -fPIC") + if(WITH_JEMALLOC) + message(FATAL "TSAN does not work well with JeMalloc") + endif() +endif() + +option(WITH_UBSAN "build with UBSAN" OFF) +if(WITH_UBSAN) + add_definitions(-DROCKSDB_UBSAN_RUN) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") + if(WITH_JEMALLOC) + message(FATAL "UBSAN does not work well with JeMalloc") + endif() +endif() + + +if(CMAKE_SYSTEM_NAME MATCHES "Cygwin") + add_definitions(-fno-builtin-memcmp -DCYGWIN) +elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") + add_definitions(-DOS_MACOSX) + if(CMAKE_SYSTEM_PROCESSOR MATCHES arm) + add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE) + # no debug info for IOS, that will make our library big + add_definitions(-DNDEBUG) + endif() +elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") + add_definitions(-DOS_LINUX) +elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") + add_definitions(-DOS_SOLARIS) +elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") + add_definitions(-DOS_GNU_KFREEBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + add_definitions(-DOS_FREEBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") + add_definitions(-DOS_NETBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_definitions(-DOS_OPENBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly") + add_definitions(-DOS_DRAGONFLYBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "Android") + add_definitions(-DOS_ANDROID) +elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") + add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DNOMINMAX) + if(MINGW) + add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_VISTA) + endif() +endif() + +if(NOT WIN32) + add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX) +endif() + +option(WITH_FALLOCATE "build with fallocate" ON) +if(WITH_FALLOCATE) + CHECK_CXX_SOURCE_COMPILES(" +#include +#include +int main() { + int fd = open(\"/dev/null\", 0); + fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024); +} +" HAVE_FALLOCATE) + if(HAVE_FALLOCATE) + add_definitions(-DROCKSDB_FALLOCATE_PRESENT) + endif() +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#include +int main() { + int fd = open(\"/dev/null\", 0); + sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE); +} +" HAVE_SYNC_FILE_RANGE_WRITE) +if(HAVE_SYNC_FILE_RANGE_WRITE) + add_definitions(-DROCKSDB_RANGESYNC_PRESENT) +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#include +int main() { + (void) PTHREAD_MUTEX_ADAPTIVE_NP; +} +" HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) +if(HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) + add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX) +endif() + +include(CheckCXXSymbolExists) +if(CMAKE_SYSTEM_NAME MATCHES "^FreeBSD") + check_cxx_symbol_exists(malloc_usable_size ${ROCKSDB_SOURCE_DIR}/malloc_np.h HAVE_MALLOC_USABLE_SIZE) +else() + check_cxx_symbol_exists(malloc_usable_size ${ROCKSDB_SOURCE_DIR}/malloc.h HAVE_MALLOC_USABLE_SIZE) +endif() +if(HAVE_MALLOC_USABLE_SIZE) + add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE) +endif() + +check_cxx_symbol_exists(sched_getcpu sched.h HAVE_SCHED_GETCPU) +if(HAVE_SCHED_GETCPU) + add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) +endif() + +check_cxx_symbol_exists(getauxval auvx.h HAVE_AUXV_GETAUXVAL) +if(HAVE_AUXV_GETAUXVAL) + add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) +endif() + +include_directories(${ROCKSDB_SOURCE_DIR}) +include_directories(${ROCKSDB_SOURCE_DIR}/include) +if(WITH_FOLLY_DISTRIBUTED_MUTEX) + include_directories(${ROCKSDB_SOURCE_DIR}/third-party/folly) +endif() +find_package(Threads REQUIRED) + +# Main library source code + +set(SOURCES + ${ROCKSDB_SOURCE_DIR}/cache/cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc + ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/builder.cc + ${ROCKSDB_SOURCE_DIR}/db/c.cc + ${ROCKSDB_SOURCE_DIR}/db/column_family.cc + ${ROCKSDB_SOURCE_DIR}/db/compacted_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_job.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_fifo.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_level.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_universal.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/sst_partitioner.cc + ${ROCKSDB_SOURCE_DIR}/db/convenience.cc + ${ROCKSDB_SOURCE_DIR}/db/db_filesnapshot.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_readonly.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_secondary.cc + ${ROCKSDB_SOURCE_DIR}/db/db_info_dumper.cc + ${ROCKSDB_SOURCE_DIR}/db/db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/dbformat.cc + ${ROCKSDB_SOURCE_DIR}/db/error_handler.cc + ${ROCKSDB_SOURCE_DIR}/db/event_helpers.cc + ${ROCKSDB_SOURCE_DIR}/db/experimental.cc + ${ROCKSDB_SOURCE_DIR}/db/external_sst_file_ingestion_job.cc + ${ROCKSDB_SOURCE_DIR}/db/file_indexer.cc + ${ROCKSDB_SOURCE_DIR}/db/flush_job.cc + ${ROCKSDB_SOURCE_DIR}/db/flush_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/forward_iterator.cc + ${ROCKSDB_SOURCE_DIR}/db/import_column_family_job.cc + ${ROCKSDB_SOURCE_DIR}/db/internal_stats.cc + ${ROCKSDB_SOURCE_DIR}/db/logs_with_prep_tracker.cc + ${ROCKSDB_SOURCE_DIR}/db/log_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/malloc_stats.cc + ${ROCKSDB_SOURCE_DIR}/db/memtable.cc + ${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc + ${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc + ${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc + ${ROCKSDB_SOURCE_DIR}/db/output_validator.cc + ${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc + ${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc + ${ROCKSDB_SOURCE_DIR}/db/repair.cc + ${ROCKSDB_SOURCE_DIR}/db/snapshot_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/table_cache.cc + ${ROCKSDB_SOURCE_DIR}/db/table_properties_collector.cc + ${ROCKSDB_SOURCE_DIR}/db/transaction_log_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/trim_history_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/version_builder.cc + ${ROCKSDB_SOURCE_DIR}/db/version_edit.cc + ${ROCKSDB_SOURCE_DIR}/db/version_edit_handler.cc + ${ROCKSDB_SOURCE_DIR}/db/version_set.cc + ${ROCKSDB_SOURCE_DIR}/db/wal_edit.cc + ${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc + ${ROCKSDB_SOURCE_DIR}/db/write_batch.cc + ${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc + ${ROCKSDB_SOURCE_DIR}/db/write_controller.cc + ${ROCKSDB_SOURCE_DIR}/db/write_thread.cc + ${ROCKSDB_SOURCE_DIR}/env/env.cc + ${ROCKSDB_SOURCE_DIR}/env/env_chroot.cc + ${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc + ${ROCKSDB_SOURCE_DIR}/env/env_hdfs.cc + ${ROCKSDB_SOURCE_DIR}/env/file_system.cc + ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc + ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc + ${ROCKSDB_SOURCE_DIR}/file/file_util.cc + ${ROCKSDB_SOURCE_DIR}/file/filename.cc + ${ROCKSDB_SOURCE_DIR}/file/random_access_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/file/read_write_util.cc + ${ROCKSDB_SOURCE_DIR}/file/readahead_raf.cc + ${ROCKSDB_SOURCE_DIR}/file/sequence_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/file/sst_file_manager_impl.cc + ${ROCKSDB_SOURCE_DIR}/file/writable_file_writer.cc + ${ROCKSDB_SOURCE_DIR}/logging/auto_roll_logger.cc + ${ROCKSDB_SOURCE_DIR}/logging/event_logger.cc + ${ROCKSDB_SOURCE_DIR}/logging/log_buffer.cc + ${ROCKSDB_SOURCE_DIR}/memory/arena.cc + ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc + ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc + ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/skiplistrep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/vectorrep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/write_buffer_manager.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/histogram.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/histogram_windowing.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/in_memory_stats_history.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/instrumented_mutex.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/iostats_context.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/perf_context.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc + ${ROCKSDB_SOURCE_DIR}/options/cf_options.cc + ${ROCKSDB_SOURCE_DIR}/options/configurable.cc + ${ROCKSDB_SOURCE_DIR}/options/db_options.cc + ${ROCKSDB_SOURCE_DIR}/options/options.cc + ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc + ${ROCKSDB_SOURCE_DIR}/options/options_parser.cc + ${ROCKSDB_SOURCE_DIR}/port/stack_trace.cc + ${ROCKSDB_SOURCE_DIR}/table/adaptive/adaptive_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/binary_search_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_footer.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/filter_block_reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/filter_policy.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/flush_block_policy.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/full_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/hash_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/index_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/index_reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/parsed_full_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/uncompression_dict_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_fetcher.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/format.cc + ${ROCKSDB_SOURCE_DIR}/table/get_context.cc + ${ROCKSDB_SOURCE_DIR}/table/iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc + ${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_index.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_key_coding.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc + ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc + ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc + ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc + ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc + ${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc + ${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc + ${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc + ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/util/coding.cc + ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc + ${ROCKSDB_SOURCE_DIR}/util/comparator.cc + ${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc + ${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc + ${ROCKSDB_SOURCE_DIR}/util/crc32c.cc + ${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc + ${ROCKSDB_SOURCE_DIR}/util/hash.cc + ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc + ${ROCKSDB_SOURCE_DIR}/util/random.cc + ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/slice.cc + ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc + ${ROCKSDB_SOURCE_DIR}/util/status.cc + ${ROCKSDB_SOURCE_DIR}/util/string_util.cc + ${ROCKSDB_SOURCE_DIR}/util/thread_local.cc + ${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc + ${ROCKSDB_SOURCE_DIR}/util/xxhash.cc + ${ROCKSDB_SOURCE_DIR}/utilities/backupable/backupable_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc + ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc + ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc + ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc + ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/sortlist.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend2.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/uint64add.cc + ${ROCKSDB_SOURCE_DIR}/utilities/object_registry.cc + ${ROCKSDB_SOURCE_DIR}/utilities/option_change_migration/option_change_migration.cc + ${ROCKSDB_SOURCE_DIR}/utilities/options/options_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_metadata.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/persistent_cache_tier.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc + ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc + ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_tracker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point_lock_tracker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/snapshot_checker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_base.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_db_mutex_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_lock_mgr.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc + ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc + $) + +if(HAVE_SSE42 AND NOT MSVC) + set_source_files_properties( + ${ROCKSDB_SOURCE_DIR}/util/crc32c.cc + PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul") +endif() + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c + ${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc_asm.S) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") + +if(HAS_ARMV8_CRC) + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/util/crc32c_arm64.cc) +endif(HAS_ARMV8_CRC) + +if(WIN32) + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/port/win/io_win.cc + ${ROCKSDB_SOURCE_DIR}/port/win/env_win.cc + ${ROCKSDB_SOURCE_DIR}/port/win/env_default.cc + ${ROCKSDB_SOURCE_DIR}/port/win/port_win.cc + ${ROCKSDB_SOURCE_DIR}/port/win/win_logger.cc) + if(NOT MINGW) + # Mingw only supports std::thread when using + # posix threads. + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/port/win/win_thread.cc) + endif() +if(WITH_XPRESS) + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/port/win/xpress_win.cc) +endif() + +if(WITH_JEMALLOC) + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/port/win/win_jemalloc.cc) +endif() + +else() + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/port/port_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/env_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc + ${ROCKSDB_SOURCE_DIR}/env/io_posix.cc) +endif() + +if(WITH_FOLLY_DISTRIBUTED_MUTEX) + list(APPEND SOURCES + ${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/detail/Futex.cpp + ${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/AtomicNotification.cpp + ${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/DistributedMutex.cpp + ${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/ParkingLot.cpp + ${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/WaitOptions.cpp) +endif() + +set(ROCKSDB_STATIC_LIB rocksdb) + +if(WIN32) + set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) +else() + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT}) +endif() + +add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES}) +target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE + ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) diff --git a/contrib/xz b/contrib/xz new file mode 160000 index 00000000000..869b9d1b4ed --- /dev/null +++ b/contrib/xz @@ -0,0 +1 @@ +Subproject commit 869b9d1b4edd6df07f819d360d306251f8147353 diff --git a/debian/changelog b/debian/changelog index 3da82efd47e..5ea6b472e46 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (20.12.1.1) unstable; urgency=low +clickhouse (20.13.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Thu, 05 Nov 2020 21:52:47 +0300 + -- clickhouse-release Mon, 23 Nov 2020 10:29:24 +0300 diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 8f10153a682..3e4e888eacd 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -67,26 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then fi -is_running() -{ - pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null -} - - -wait_for_done() -{ - timeout=$1 - attempts=0 - while is_running; do - attempts=$(($attempts + 1)) - if [ -n "$timeout" ] && [ $attempts -gt $timeout ]; then - return 1 - fi - sleep 1 - done -} - - die() { echo $1 >&2 @@ -105,49 +85,7 @@ check_config() initdb() { - if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then - CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") - if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then - die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; - fi - echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" - else - CLICKHOUSE_DATADIR_FROM_CONFIG=$CLICKHOUSE_DATADIR - fi - - if ! getent passwd ${CLICKHOUSE_USER} >/dev/null; then - echo "Can't chown to non-existing user ${CLICKHOUSE_USER}" - return - fi - if ! getent group ${CLICKHOUSE_GROUP} >/dev/null; then - echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}" - return - fi - - if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then - echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]" - fi - - if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then - if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") = "/" ]; then - echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown." - else - if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then - echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}" - mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}" - fi - - echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]" - chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}" - fi - fi - - if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then - echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]" - chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/* - echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]" - chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR} - fi + ${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" } @@ -171,17 +109,7 @@ restart() forcestop() { - local EXIT_STATUS - EXIT_STATUS=0 - - echo -n "Stop forcefully $PROGRAM service: " - - kill -KILL $(cat "$CLICKHOUSE_PIDFILE") - - wait_for_done - - echo "DONE" - return $EXIT_STATUS + ${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}" } @@ -261,16 +189,16 @@ main() service_or_func restart ;; condstart) - is_running || service_or_func start + service_or_func start ;; condstop) - is_running && service_or_func stop + service_or_func stop ;; condrestart) - is_running && service_or_func restart + service_or_func restart ;; condreload) - is_running && service_or_func restart + service_or_func restart ;; initdb) initdb @@ -293,17 +221,7 @@ main() status() { - if is_running; then - echo "$PROGRAM service is running" - exit 0 - else - if is_cron_disabled; then - echo "$PROGRAM service is stopped"; - else - echo "$PROGRAM: process unexpectedly terminated" - fi - exit 3 - fi + ${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}" } diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 2223b942429..3ef6b8c8b32 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index 50671011a23..2f501f76e68 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -56,6 +56,7 @@ RUN apt-get update \ libprotoc-dev \ libgrpc++-dev \ protobuf-compiler-grpc \ + libc-ares-dev \ rapidjson-dev \ libsnappy-dev \ libparquet-dev \ @@ -64,6 +65,8 @@ RUN apt-get update \ libbz2-dev \ libavro-dev \ libfarmhash-dev \ + librocksdb-dev \ + libgflags-dev \ libmysqlclient-dev \ --yes --no-install-recommends diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 1ce6e427409..f7e107a2fc9 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index cd2bead5616..8e3b5193874 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.12.1.* +ARG version=20.13.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile index 32020951539..cea1a63cf6f 100644 --- a/docker/test/coverage/Dockerfile +++ b/docker/test/coverage/Dockerfile @@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build ENV OUTPUT_DIR=/output ENV IGNORE='.*contrib.*' -CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \ +RUN apt-get update && apt-get install cmake --yes --no-install-recommends + +CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \ dpkg -i /package_folder/clickhouse-common-static_*.deb; \ - llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ - llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ + llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ + llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index f6c665ff3fd..ac22a9dfaf0 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,5 +1,5 @@ # docker build -t yandex/clickhouse-fasttest . -FROM ubuntu:19.10 +FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=10 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index eb737948aa6..c3f102786ae 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -15,6 +15,9 @@ stage=${stage:-} # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" +# Run only matching tests. +FASTTEST_FOCUS=${FASTTEST_FOCUS:-""} + FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}") FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}") FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}") @@ -101,217 +104,248 @@ function start_server function clone_root { -git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" + git clone https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" -( -cd "$FASTTEST_SOURCE" -if [ "$PULL_REQUEST_NUMBER" != "0" ]; then - if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then - git checkout FETCH_HEAD - echo 'Clonned merge head' - else - git fetch - git checkout "$COMMIT_SHA" - echo 'Checked out to commit' - fi -else - if [ -v COMMIT_SHA ]; then - git checkout "$COMMIT_SHA" - fi -fi -) + ( + cd "$FASTTEST_SOURCE" + if [ "$PULL_REQUEST_NUMBER" != "0" ]; then + if git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/merge"; then + git checkout FETCH_HEAD + echo 'Clonned merge head' + else + git fetch + git checkout "$COMMIT_SHA" + echo 'Checked out to commit' + fi + else + if [ -v COMMIT_SHA ]; then + git checkout "$COMMIT_SHA" + fi + fi + ) } function clone_submodules { -( -cd "$FASTTEST_SOURCE" + ( + cd "$FASTTEST_SOURCE" -SUBMODULES_TO_UPDATE=(contrib/boost contrib/zlib-ng contrib/libxml2 contrib/poco contrib/libunwind contrib/ryu contrib/fmtlib contrib/base64 contrib/cctz contrib/libcpuid contrib/double-conversion contrib/libcxx contrib/libcxxabi contrib/libc-headers contrib/lz4 contrib/zstd contrib/fastops contrib/rapidjson contrib/re2 contrib/sparsehash-c11 contrib/croaring) + SUBMODULES_TO_UPDATE=( + contrib/boost + contrib/zlib-ng + contrib/libxml2 + contrib/poco + contrib/libunwind + contrib/ryu + contrib/fmtlib + contrib/base64 + contrib/cctz + contrib/libcpuid + contrib/double-conversion + contrib/libcxx + contrib/libcxxabi + contrib/libc-headers + contrib/lz4 + contrib/zstd + contrib/fastops + contrib/rapidjson + contrib/re2 + contrib/sparsehash-c11 + contrib/croaring + contrib/miniselect + contrib/xz + ) -git submodule sync -git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" -git submodule foreach git reset --hard -git submodule foreach git checkout @ -f -git submodule foreach git clean -xfd -) + git submodule sync + git submodule update --init --recursive "${SUBMODULES_TO_UPDATE[@]}" + git submodule foreach git reset --hard + git submodule foreach git checkout @ -f + git submodule foreach git clean -xfd + ) } function run_cmake { -CMAKE_LIBS_CONFIG=( - "-DENABLE_LIBRARIES=0" - "-DENABLE_TESTS=0" - "-DENABLE_UTILS=0" - "-DENABLE_EMBEDDED_COMPILER=0" - "-DENABLE_THINLTO=0" - "-DUSE_UNWIND=1" -) + CMAKE_LIBS_CONFIG=( + "-DENABLE_LIBRARIES=0" + "-DENABLE_TESTS=0" + "-DENABLE_UTILS=0" + "-DENABLE_EMBEDDED_COMPILER=0" + "-DENABLE_THINLTO=0" + "-DUSE_UNWIND=1" + ) -# TODO remove this? we don't use ccache anyway. An option would be to download it -# from S3 simultaneously with cloning. -export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" -export CCACHE_BASEDIR="$FASTTEST_SOURCE" -export CCACHE_NOHASHDIR=true -export CCACHE_COMPILERCHECK=content -export CCACHE_MAXSIZE=15G + # TODO remove this? we don't use ccache anyway. An option would be to download it + # from S3 simultaneously with cloning. + export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" + export CCACHE_BASEDIR="$FASTTEST_SOURCE" + export CCACHE_NOHASHDIR=true + export CCACHE_COMPILERCHECK=content + export CCACHE_MAXSIZE=15G -ccache --show-stats ||: -ccache --zero-stats ||: + ccache --show-stats ||: + ccache --zero-stats ||: -mkdir "$FASTTEST_BUILD" ||: + mkdir "$FASTTEST_BUILD" ||: -( -cd "$FASTTEST_BUILD" -cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" -) + ( + cd "$FASTTEST_BUILD" + cmake "$FASTTEST_SOURCE" -DCMAKE_CXX_COMPILER=clang++-10 -DCMAKE_C_COMPILER=clang-10 "${CMAKE_LIBS_CONFIG[@]}" "${FASTTEST_CMAKE_FLAGS[@]}" | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/cmake_log.txt" + ) } function build { -( -cd "$FASTTEST_BUILD" -time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" -if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then - cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" -fi -ccache --show-stats ||: -) + ( + cd "$FASTTEST_BUILD" + time ninja clickhouse-bundle | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" + if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then + cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" + fi + ccache --show-stats ||: + ) } function configure { -clickhouse-client --version -clickhouse-test --help + clickhouse-client --version + clickhouse-test --help -mkdir -p "$FASTTEST_DATA"{,/client-config} -cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA" -"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config" -cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d" -# doesn't support SSL -rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" + mkdir -p "$FASTTEST_DATA"{,/client-config} + cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA" + "$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config" + cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d" + # doesn't support SSL + rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } function run_tests { -clickhouse-server --version -clickhouse-test --help + clickhouse-server --version + clickhouse-test --help -# Kill the server in case we are running locally and not in docker -stop_server ||: - -start_server - -TESTS_TO_SKIP=( - 00105_shard_collations - 00109_shard_totals_after_having - 00110_external_sort - 00302_http_compression - 00417_kill_query - 00436_convert_charset - 00490_special_line_separators_and_characters_outside_of_bmp - 00652_replicated_mutations_zookeeper - 00682_empty_parts_merge - 00701_rollup - 00834_cancel_http_readonly_queries_on_client_close - 00911_tautological_compare - 00926_multimatch - 00929_multi_match_edit_distance - 01031_mutations_interpreter_and_context - 01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled - 01083_expressions_in_engine_arguments - 01092_memory_profiler - 01098_msgpack_format - 01098_temporary_and_external_tables - 01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally - 01193_metadata_loading - 01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently - 01251_dict_is_in_infinite_loop - 01259_dictionary_custom_settings_ddl - 01268_dictionary_direct_layout - 01280_ssd_complex_key_dictionary - 01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently - 01318_encrypt # Depends on OpenSSL - 01318_decrypt # Depends on OpenSSL - 01281_unsucceeded_insert_select_queries_counter - 01292_create_user - 01294_lazy_database_concurrent - 01305_replica_create_drop_zookeeper - 01354_order_by_tuple_collate_const - 01355_ilike - 01411_bayesian_ab_testing - 01532_collate_in_low_cardinality - 01533_collate_in_nullable - 01542_collate_in_array - 01543_collate_in_tuple - _orc_ - arrow - avro - base64 - brotli - capnproto - client - ddl_dictionaries - h3 - hashing - hdfs - java_hash - json - limit_memory - live_view - memory_leak - memory_limit - mysql - odbc - parallel_alter - parquet - protobuf - secure - sha256 - - # Not sure why these two fail even in sequential mode. Disabled for now - # to make some progress. - 00646_url_engine - 00974_query_profiler - - # Look at DistributedFilesToInsert, so cannot run in parallel. - 01460_DistributedFilesToInsert - - 01541_max_memory_usage_for_user - - # Require python libraries like scipy, pandas and numpy - 01322_ttest_scipy - - 01545_system_errors -) - -time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" - -# substr is to remove semicolon after test name -readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") - -# We will rerun sequentially any tests that have failed during parallel run. -# They might have failed because there was some interference from other tests -# running concurrently. If they fail even in seqential mode, we will report them. -# FIXME All tests that require exclusive access to the server must be -# explicitly marked as `sequential`, and `clickhouse-test` must detect them and -# run them in a separate group after all other tests. This is faster and also -# explicit instead of guessing. -if [[ -n "${FAILED_TESTS[*]}" ]] -then + # Kill the server in case we are running locally and not in docker stop_server ||: - # Clean the data so that there is no interference from the previous test run. - rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||: - start_server - echo "Going to run again: ${FAILED_TESTS[*]}" + TESTS_TO_SKIP=( + 00105_shard_collations + 00109_shard_totals_after_having + 00110_external_sort + 00302_http_compression + 00417_kill_query + 00436_convert_charset + 00490_special_line_separators_and_characters_outside_of_bmp + 00652_replicated_mutations_zookeeper + 00682_empty_parts_merge + 00701_rollup + 00834_cancel_http_readonly_queries_on_client_close + 00911_tautological_compare + 00926_multimatch + 00929_multi_match_edit_distance + 01031_mutations_interpreter_and_context + 01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled + 01083_expressions_in_engine_arguments + 01092_memory_profiler + 01098_msgpack_format + 01098_temporary_and_external_tables + 01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally + 01193_metadata_loading + 01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently + 01251_dict_is_in_infinite_loop + 01259_dictionary_custom_settings_ddl + 01268_dictionary_direct_layout + 01280_ssd_complex_key_dictionary + 01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently + 01318_encrypt # Depends on OpenSSL + 01318_decrypt # Depends on OpenSSL + 01281_unsucceeded_insert_select_queries_counter + 01292_create_user + 01294_lazy_database_concurrent + 01305_replica_create_drop_zookeeper + 01354_order_by_tuple_collate_const + 01355_ilike + 01411_bayesian_ab_testing + 01532_collate_in_low_cardinality + 01533_collate_in_nullable + 01542_collate_in_array + 01543_collate_in_tuple + _orc_ + arrow + avro + base64 + brotli + capnproto + client + ddl_dictionaries + h3 + hashing + hdfs + java_hash + json + limit_memory + live_view + memory_leak + memory_limit + mysql + odbc + parallel_alter + parquet + protobuf + secure + sha256 + xz - clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt" -else - echo "No failed tests" -fi + # Not sure why these two fail even in sequential mode. Disabled for now + # to make some progress. + 00646_url_engine + 00974_query_profiler + + # In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default + 01504_rocksdb + + # Look at DistributedFilesToInsert, so cannot run in parallel. + 01460_DistributedFilesToInsert + + 01541_max_memory_usage_for_user + + # Require python libraries like scipy, pandas and numpy + 01322_ttest_scipy + 01561_mann_whitney_scipy + + 01545_system_errors + # Checks system.errors + 01563_distributed_query_finish + ) + + time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" + + # substr is to remove semicolon after test name + readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") + + # We will rerun sequentially any tests that have failed during parallel run. + # They might have failed because there was some interference from other tests + # running concurrently. If they fail even in seqential mode, we will report them. + # FIXME All tests that require exclusive access to the server must be + # explicitly marked as `sequential`, and `clickhouse-test` must detect them and + # run them in a separate group after all other tests. This is faster and also + # explicit instead of guessing. + if [[ -n "${FAILED_TESTS[*]}" ]] + then + stop_server ||: + + # Clean the data so that there is no interference from the previous test run. + rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||: + + start_server + + echo "Going to run again: ${FAILED_TESTS[*]}" + + clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt" + else + echo "No failed tests" + fi } case "$stage" in diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 3ce0000b148..6e1e11344bb 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -50,7 +50,7 @@ services: - label:disable kafka_kerberos: - image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG} + image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} hostname: kafka_kerberos volumes: - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 2f09c2c01e3..90daf8a4238 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -7,4 +7,4 @@ services: MYSQL_ROOT_PASSWORD: clickhouse ports: - 3308:3306 - command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency \ No newline at end of file + command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml new file mode 100644 index 00000000000..e7d762203ee --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml @@ -0,0 +1,10 @@ +version: '2.3' +services: + mysql1: + image: mysql:5.7 + restart: 'no' + environment: + MYSQL_ROOT_PASSWORD: clickhouse + ports: + - 3308:3306 + command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml similarity index 93% rename from docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml rename to docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml index 1aa97f59a83..918a2b5f80f 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml @@ -2,7 +2,7 @@ version: '2.3' services: mysql8_0: image: mysql:8.0 - restart: always + restart: 'no' environment: MYSQL_ROOT_PASSWORD: clickhouse ports: diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml index b172cbcb2c6..a6a338eb6a8 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_golang_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: golang1: - image: yandex/clickhouse-mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG} + image: yandex/clickhouse-mysql-golang-client:${DOCKER_MYSQL_GOLANG_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml index be1b3ad3f72..21d927df82c 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: java1: - image: yandex/clickhouse-mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG} + image: yandex/clickhouse-mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml index 83954229111..dbd85cf2382 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_js_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: mysqljs1: - image: yandex/clickhouse-mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG} + image: yandex/clickhouse-mysql-js-client:${DOCKER_MYSQL_JS_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml index e61cb193b0e..f24f5337a7e 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_php_client.yml @@ -1,6 +1,6 @@ version: '2.3' services: php1: - image: yandex/clickhouse-mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG} + image: yandex/clickhouse-mysql-php-client:${DOCKER_MYSQL_PHP_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/integration/runner/compose/docker_compose_postgesql_java_client.yml b/docker/test/integration/runner/compose/docker_compose_postgesql_java_client.yml index ef18d1edd7b..38191f1bdd6 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgesql_java_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgesql_java_client.yml @@ -1,6 +1,6 @@ version: '2.2' services: java: - image: yandex/clickhouse-postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG} + image: yandex/clickhouse-postgresql-java-client:${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:-latest} # to keep container running command: sleep infinity diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 76cadc3ce11..8734e47e80f 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -25,12 +25,13 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ + python3-setuptools \ rsync \ tree \ tzdata \ vim \ wget \ - && pip3 --no-cache-dir install clickhouse_driver scipy \ + && pip3 --no-cache-dir install 'git+https://github.com/mymarilyn/clickhouse-driver.git' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index ce7a6ae094a..f3609bcfcdb 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -16,7 +16,7 @@ 300 - 20 + 12 diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 337f13690b6..7175d0e4143 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -14,10 +14,12 @@ import string import sys import time import traceback +import logging import xml.etree.ElementTree as et from threading import Thread from scipy import stats +logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds @@ -46,6 +48,8 @@ parser.add_argument('--profile-seconds', type=int, default=0, help='For how many parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') +parser.add_argument('--keep-created-tables', action='store_true', help="Don't drop the created tables after the test.") +parser.add_argument('--use-existing-tables', action='store_true', help="Don't create or drop the tables, use the existing ones instead.") args = parser.parse_args() reportStageEnd('start') @@ -139,44 +143,37 @@ reportStageEnd('before-connect') # Open connections servers = [{'host': host or args.host[0], 'port': port or args.port[0]} for (host, port) in itertools.zip_longest(args.host, args.port)] -all_connections = [clickhouse_driver.Client(**server) for server in servers] +# Force settings_is_important to fail queries on unknown settings. +all_connections = [clickhouse_driver.Client(**server, settings_is_important=True) for server in servers] for i, s in enumerate(servers): print(f'server\t{i}\t{s["host"]}\t{s["port"]}') reportStageEnd('connect') -# Run drop queries, ignoring errors. Do this before all other activity, because -# clickhouse_driver disconnects on error (this is not configurable), and the new -# connection loses the changes in settings. -drop_query_templates = [q.text for q in root.findall('drop_query')] -drop_queries = substitute_parameters(drop_query_templates) -for conn_index, c in enumerate(all_connections): - for q in drop_queries: - try: - c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') - except: - pass +if not args.use_existing_tables: + # Run drop queries, ignoring errors. Do this before all other activity, + # because clickhouse_driver disconnects on error (this is not configurable), + # and the new connection loses the changes in settings. + drop_query_templates = [q.text for q in root.findall('drop_query')] + drop_queries = substitute_parameters(drop_query_templates) + for conn_index, c in enumerate(all_connections): + for q in drop_queries: + try: + c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') + except: + pass -reportStageEnd('drop-1') + reportStageEnd('drop-1') # Apply settings. -# If there are errors, report them and continue -- maybe a new test uses a setting -# that is not in master, but the queries can still run. If we have multiple -# settings and one of them throws an exception, all previous settings for this -# connection will be reset, because the driver reconnects on error (not -# configurable). So the end result is uncertain, but hopefully we'll be able to -# run at least some queries. settings = root.findall('settings/*') for conn_index, c in enumerate(all_connections): for s in settings: - try: - q = f"set {s.tag} = '{s.text}'" - c.execute(q) - print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') - except: - print(traceback.format_exc(), file=sys.stderr) + # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings + # (https://github.com/mymarilyn/clickhouse-driver/pull/142) + c.settings[s.tag] = s.text reportStageEnd('settings') @@ -194,37 +191,40 @@ for t in tables: reportStageEnd('preconditions') -# Run create and fill queries. We will run them simultaneously for both servers, -# to save time. -# The weird search is to keep the relative order of elements, which matters, and -# etree doesn't support the appropriate xpath query. -create_query_templates = [q.text for q in root.findall('./*') if q.tag in ('create_query', 'fill_query')] -create_queries = substitute_parameters(create_query_templates) +if not args.use_existing_tables: + # Run create and fill queries. We will run them simultaneously for both + # servers, to save time. The weird XML search + filter is because we want to + # keep the relative order of elements, and etree doesn't support the + # appropriate xpath query. + create_query_templates = [q.text for q in root.findall('./*') + if q.tag in ('create_query', 'fill_query')] + create_queries = substitute_parameters(create_query_templates) -# Disallow temporary tables, because the clickhouse_driver reconnects on errors, -# and temporary tables are destroyed. We want to be able to continue after some -# errors. -for q in create_queries: - if re.search('create temporary table', q, flags=re.IGNORECASE): - print(f"Temporary tables are not allowed in performance tests: '{q}'", - file = sys.stderr) - sys.exit(1) + # Disallow temporary tables, because the clickhouse_driver reconnects on + # errors, and temporary tables are destroyed. We want to be able to continue + # after some errors. + for q in create_queries: + if re.search('create temporary table', q, flags=re.IGNORECASE): + print(f"Temporary tables are not allowed in performance tests: '{q}'", + file = sys.stderr) + sys.exit(1) -def do_create(connection, index, queries): - for q in queries: - connection.execute(q) - print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') + def do_create(connection, index, queries): + for q in queries: + connection.execute(q) + print(f'create\t{index}\t{connection.last_query.elapsed}\t{tsv_escape(q)}') -threads = [Thread(target = do_create, args = (connection, index, create_queries)) - for index, connection in enumerate(all_connections)] + threads = [ + Thread(target = do_create, args = (connection, index, create_queries)) + for index, connection in enumerate(all_connections)] -for t in threads: - t.start() + for t in threads: + t.start() -for t in threads: - t.join() + for t in threads: + t.join() -reportStageEnd('create') + reportStageEnd('create') # By default, test all queries. queries_to_run = range(0, len(test_queries)) @@ -403,10 +403,11 @@ print(f'profile-total\t{profile_total_seconds}') reportStageEnd('run') # Run drop queries -drop_queries = substitute_parameters(drop_query_templates) -for conn_index, c in enumerate(all_connections): - for q in drop_queries: - c.execute(q) - print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') +if not args.keep_created_tables and not args.use_existing_tables: + drop_queries = substitute_parameters(drop_query_templates) + for conn_index, c in enumerate(all_connections): + for q in drop_queries: + c.execute(q) + print(f'drop\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') -reportStageEnd('drop-2') + reportStageEnd('drop-2') diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 0aedb67e572..382b486dda3 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -10,6 +10,11 @@ RUN apt-get update --yes \ gpg-agent \ debsig-verify \ strace \ + protobuf-compiler \ + protobuf-compiler-grpc \ + libprotoc-dev \ + libgrpc++-dev \ + libc-ares-dev \ --yes --no-install-recommends #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add - @@ -33,7 +38,8 @@ RUN set -x \ && dpkg -i "${PKG_VERSION}.deb" CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ - && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF && ninja re2_st \ + && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \ + && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log diff --git a/docker/test/stateful_with_coverage/Dockerfile b/docker/test/stateful_with_coverage/Dockerfile index f5d66ed5013..ac6645b9463 100644 --- a/docker/test/stateful_with_coverage/Dockerfile +++ b/docker/test/stateful_with_coverage/Dockerfile @@ -1,12 +1,12 @@ # docker build -t yandex/clickhouse-stateful-test-with-coverage . -FROM yandex/clickhouse-stateless-test +FROM yandex/clickhouse-stateless-test-with-coverage RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests + python3-requests procps psmisc COPY s3downloader /s3downloader COPY run.sh /run.sh diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index aaf7e0a44ac..5fc6350fad8 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -1,40 +1,44 @@ #!/bin/bash kill_clickhouse () { - kill "$(pgrep -u clickhouse)" 2>/dev/null + echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" - break - else - echo "Process $(pgrep -u clickhouse) still alive" - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done + + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & -} - -wait_llvm_profdata () { - while kill -0 "$(pgrep llvm-profdata-10)" + counter=0 + until clickhouse-client --query "SELECT 1" do - echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive" - sleep 3 + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) done } -merge_client_files_in_background () { - client_files=$(ls /client_*profraw 2>/dev/null) - if [ -n "$client_files" ] - then - llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw" - rm "$client_files" - fi -} chmod 777 / @@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/ # install test configs /usr/share/clickhouse-test/config/install.sh -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt 120 ] - then - echo "Cannot start clickhouse-server" - cat /var/log/clickhouse-server/stdout.log - tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n1000 /var/log/clickhouse-server/clickhouse-server.log - break - fi - timeout 120 service clickhouse-server start - sleep 0.5 - counter=$((counter + 1)) - done -} - -start +start_clickhouse # shellcheck disable=SC2086 # No quotes because I want to split it into words. if ! /s3downloader --dataset-names $DATASETS; then @@ -81,25 +66,20 @@ fi chmod 777 -R /var/lib/clickhouse -while /bin/true; do - merge_client_files_in_background - sleep 2 -done & -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test" kill_clickhouse start_clickhouse -sleep 10 +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" @@ -109,15 +89,10 @@ fi # more idiologically correct. read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse -wait_llvm_profdata - sleep 3 -wait_llvm_profdata # 100% merged all parts - - cp /*.profraw /profraw ||: diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index a27c03a70f0..363ece8dac6 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -29,7 +29,7 @@ def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'w') as f: + with open(path, 'wb') as f: response = requests.get(url, stream=True) response.raise_for_status() total_length = response.headers.get('content-length') diff --git a/docker/test/stateless_unbundled/Dockerfile b/docker/test/stateless_unbundled/Dockerfile index 345ba905412..1c9f9510d7e 100644 --- a/docker/test/stateless_unbundled/Dockerfile +++ b/docker/test/stateless_unbundled/Dockerfile @@ -43,6 +43,8 @@ RUN apt-get --allow-unauthenticated update -y \ libreadline-dev \ libsasl2-dev \ libzstd-dev \ + librocksdb-dev \ + libgflags-dev \ lsof \ moreutils \ ncdu \ diff --git a/docker/test/stateless_with_coverage/Dockerfile b/docker/test/stateless_with_coverage/Dockerfile index 1d6a85adf9e..f7379ba5568 100644 --- a/docker/test/stateless_with_coverage/Dockerfile +++ b/docker/test/stateless_with_coverage/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateless-with-coverage-test . +# docker build -t yandex/clickhouse-stateless-test-with-coverage . # TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs) FROM yandex/clickhouse-test-base @@ -28,7 +28,9 @@ RUN apt-get update -y \ lsof \ unixodbc \ wget \ - qemu-user-static + qemu-user-static \ + procps \ + psmisc RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 758591df618..4e4d9430a11 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -2,27 +2,41 @@ kill_clickhouse () { echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' - kill "$(pgrep -u clickhouse)" 2>/dev/null + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S' - break - else - echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S' - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done - echo "Will try to send second kill signal for sure" - kill "$(pgrep -u clickhouse)" 2>/dev/null - sleep 5 - echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) + done } chmod 777 / @@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/ start_clickhouse -sleep 10 - - if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 874dca751f3..458f78fcdb4 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -17,13 +17,6 @@ def get_skip_list_cmd(path): return '' -def run_perf_test(cmd, xmls_path, output_folder): - output_path = os.path.join(output_folder, "perf_stress_run.txt") - f = open(output_path, 'w') - p = Popen("{} --skip-tags=long --recursive --input-files {}".format(cmd, xmls_path), shell=True, stdout=f, stderr=f) - return p - - def get_options(i): options = "" if 0 < i: @@ -75,8 +68,6 @@ if __name__ == "__main__": args = parser.parse_args() func_pipes = [] - perf_process = None - perf_process = run_perf_test(args.perf_test_cmd, args.perf_test_xml_path, args.output_folder) func_pipes = run_func_test(args.test_cmd, args.output_folder, args.num_parallel, args.skip_func_tests, args.global_time_limit) logging.info("Will wait functests to finish") diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index e99af08afa5..70db2dc485e 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.62 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.65 docker-compose docker dicttoxml kazoo tzlocal ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index 10776e04c01..7dfda35e34a 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -13,9 +13,9 @@ cmake .. \ -DENABLE_CLICKHOUSE_SERVER=ON \ -DENABLE_CLICKHOUSE_CLIENT=ON \ -DUSE_STATIC_LIBRARIES=OFF \ - -DCLICKHOUSE_SPLIT_BINARY=ON \ -DSPLIT_SHARED_LIBRARIES=ON \ -DENABLE_LIBRARIES=OFF \ + -DUSE_UNWIND=ON \ -DENABLE_UTILS=OFF \ -DENABLE_TESTS=OFF ``` diff --git a/docs/en/development/adding_test_queries.md b/docs/en/development/adding_test_queries.md new file mode 100644 index 00000000000..4770d48ebd4 --- /dev/null +++ b/docs/en/development/adding_test_queries.md @@ -0,0 +1,141 @@ +# How to add test queries to ClickHouse CI + +ClickHouse has hundreds (or even thousands) of features. Every commit get checked by a complex set of tests containing many thousands of test cases. + +The core functionality is very well tested, but some corner-cases and different combinations of features can be uncovered with ClickHouse CI. + +Most of the bugs/regressions we see happen in that 'grey area' where test coverage is poor. + +And we are very interested in covering most of the possible scenarios and feature combinations used in real life by tests. + +## Why adding tests + +Why/when you should add a test case into ClickHouse code: +1) you use some complicated scenarios / feature combinations / you have some corner case which is probably not widely used +2) you see that certain behavior gets changed between version w/o notifications in the changelog +3) you just want to help to improve ClickHouse quality and ensure the features you use will not be broken in the future releases +4) once the test is added/accepted, you can be sure the corner case you check will never be accidentally broken. +5) you will be a part of great open-source community +6) your name will be visible in the `system.contributors` table! +7) you will make a world bit better :) + +### Steps to do + +#### Prerequisite + +I assume you run some Linux machine (you can use docker / virtual machines on other OS) and any modern browser / internet connection, and you have some basic Linux & SQL skills. + +Any highly specialized knowledge is not needed (so you don't need to know C++ or know something about how ClickHouse CI works). + + +#### Preparation + +1) [create GitHub account](https://github.com/join) (if you haven't one yet) +2) [setup git](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/set-up-git) +```bash +# for Ubuntu +sudo apt-get update +sudo apt-get install git + +git config --global user.name "John Doe" # fill with your name +git config --global user.email "email@example.com" # fill with your email + +``` +3) [fork ClickHouse project](https://docs.github.com/en/free-pro-team@latest/github/getting-started-with-github/fork-a-repo) - just open [https://github.com/ClickHouse/ClickHouse](https://github.com/ClickHouse/ClickHouse) and press fork button in the top right corner: +![fork repo](https://github-images.s3.amazonaws.com/help/bootcamp/Bootcamp-Fork.png) + +4) clone your fork to some folder on your PC, for example, `~/workspace/ClickHouse` +``` +mkdir ~/workspace && cd ~/workspace +git clone https://github.com/< your GitHub username>/ClickHouse +cd ClickHouse +git remote add upstream https://github.com/ClickHouse/ClickHouse +``` + +#### New branch for the test + +1) create a new branch from the latest clickhouse master +``` +cd ~/workspace/ClickHouse +git fetch upstream +git checkout -b name_for_a_branch_with_my_test upstream/master +``` + +#### Install & run clickhouse + +1) install `clickhouse-server` (follow [official docs](https://clickhouse.tech/docs/en/getting-started/install/)) +2) install test configurations (it will use Zookeeper mock implementation and adjust some settings) +``` +cd ~/workspace/ClickHouse/tests/config +sudo ./install.sh +``` +3) run clickhouse-server +``` +sudo systemctl restart clickhouse-server +``` + +#### Creating the test file + + +1) find the number for your test - find the file with the biggest number in `tests/queries/0_stateless/` + +```sh +$ cd ~/workspace/ClickHouse +$ ls tests/queries/0_stateless/[0-9]*.reference | tail -n 1 +tests/queries/0_stateless/01520_client_print_query_id.reference +``` +Currently, the last number for the test is `01520`, so my test will have the number `01521` + +2) create an SQL file with the next number and name of the feature you test + +```sh +touch tests/queries/0_stateless/01521_dummy_test.sql +``` + +3) edit SQL file with your favorite editor (see hint of creating tests below) +```sh +vim tests/queries/0_stateless/01521_dummy_test.sql +``` + + +4) run the test, and put the result of that into the reference file: +``` +clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee tests/queries/0_stateless/01521_dummy_test.reference +``` + +5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor. + +#### How create good test + +- test should be + - minimal - create only tables related to tested functionality, remove unrelated columns and parts of query + - fast - should not take longer than few seconds (better subseconds) + - correct - fails then feature is not working + - deteministic + - isolated / stateless + - don't rely on some environment things + - don't rely on timing when possible +- try to cover corner cases (zeros / Nulls / empty sets / throwing exceptions) +- to test that query return errors, you can put special comment after the query: `-- { serverError 60 }` or `-- { clientError 20 }` +- don't switch databases (unless necessary) +- you can create several table replicas on the same node if needed +- you can use one of the test cluster definitions when needed (see system.clusters) +- use `number` / `numbers_mt` / `zeros` / `zeros_mt` and similar for queries / to initialize data when appliable +- clean up the created objects after test and before the test (DROP IF EXISTS) - in case of some dirty state +- prefer sync mode of operations (mutations, merges, etc.) +- use other SQL files in the `0_stateless` folder as an example +- ensure the feature / feature combination you want to tests is not covered yet with existsing tests + +#### Commit / push / create PR. + +1) commit & push your changes +```sh +cd ~/workspace/ClickHouse +git add tests/queries/0_stateless/01521_dummy_test.sql +git add tests/queries/0_stateless/01521_dummy_test.reference +git commit # use some nice commit message when possible +git push origin HEAD +``` +2) use a link which was shown during the push, to create a PR into the main repo +3) adjust the PR title and contents, in `Changelog category (leave one)` keep +`Build/Testing/Packaging Improvement`, fill the rest of the fields if you want. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 639b78185e4..76a2f647231 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -17,7 +17,6 @@ toc_title: Third-Party Libraries Used | googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) | | h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..857e148277c --- /dev/null +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,45 @@ +--- +toc_priority: 6 +toc_title: EmbeddedRocksDB +--- + +# EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine} + +This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/). + +`EmbeddedRocksDB` lets you: + +## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +``` + +Required parameters: + +- `primary_key_name` – any column name in the column list. + +Example: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key +``` + +## Description {#description} + +- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key. +- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order. +- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb. diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 4bfb9dc200e..ea0b265d652 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -273,13 +273,15 @@ SELECT sum(Duration) AS Duration FROM UAct GROUP BY UserID -```text +``` + +``` text ┌──────────────UserID─┬─PageViews─┬─Duration─┐ │ 4324182021466249494 │ 6 │ 185 │ └─────────────────────┴───────────┴──────────┘ ``` -``` sqk +``` sql select count() FROM UAct ``` diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 932facc9ddc..625869a3cb8 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -53,6 +53,42 @@ Example of setting the addresses of the ZooKeeper cluster: ``` +ClickHouse also supports to store replicas meta information in the auxiliary ZooKeeper cluster by providing ZooKeeper cluster name and path as engine arguments. +In other word, it supports to store the metadata of differnt tables in different ZooKeeper clusters. + +Example of setting the addresses of the auxiliary ZooKeeper cluster: + +``` xml + + + + example_2_1 + 2181 + + + example_2_2 + 2181 + + + example_2_3 + 2181 + + + + + example_3_1 + 2181 + + + +``` + +To store table datameta in a auxiliary ZooKeeper cluster instead of default ZooKeeper cluster, we can use the SQL to create table with +ReplicatedMergeTree engine as follow: + +``` +CREATE TABLE table_name ( ... ) ENGINE = ReplicatedMergeTree('zookeeper_name_configured_in_auxiliary_zookeepers:path', 'replica_name') ... +``` You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table). If ZooKeeper isn’t set in the config file, you can’t create replicated tables, and any existing replicated tables will be read-only. @@ -152,7 +188,7 @@ You can specify default arguments for `Replicated` table engine in the server co ```xml /clickhouse/tables/{shard}/{database}/{table} -{replica} +{replica} ``` In this case, you can omit arguments when creating tables: diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md index fb94f226f2b..7038cc539d2 100644 --- a/docs/en/faq/integration/json-import.md +++ b/docs/en/faq/integration/json-import.md @@ -30,4 +30,4 @@ Instead of inserting data manually, you might consider to use one of [client lib - `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. !!! note "Note" - Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. \ No newline at end of file + Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index 8d41279fef9..3e051456a75 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse ## Single Node Setup {#single-node-setup} -To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them. +To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do not support them. For example, you have chosen `deb` packages and executed: diff --git a/docs/en/index.md b/docs/en/index.md index 8280d5c9f97..676fd444995 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -5,7 +5,7 @@ toc_title: Overview # What Is ClickHouse? {#what-is-clickhouse} -ClickHouse is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP). +ClickHouse® is a column-oriented database management system (DBMS) for online analytical processing of queries (OLAP). In a “normal” row-oriented DBMS, data is stored in this order: diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 07ed3e5c884..42416383860 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -123,6 +123,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection. +- `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). ### Configuration Files {#configuration_files} diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d310705d1c1..618ae374e8a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -25,6 +25,7 @@ The supported formats are: | [Vertical](#vertical) | ✗ | ✔ | | [VerticalRaw](#verticalraw) | ✗ | ✔ | | [JSON](#json) | ✗ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | | [JSONString](#jsonstring) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | @@ -507,6 +508,34 @@ Example: } ``` +## JSONAsString {#jsonasstring} + +In this format, a single JSON object is interpreted as a single value. If input has several JSON objects (comma separated) they will be interpreted as a sepatate rows. + +This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted. Once you collect whole JSON object to string you can use [JSON functions](../sql-reference/functions/json-functions.md) to process it. + +**Example** + +Query: + +``` sql +DROP TABLE IF EXISTS json_as_string; +CREATE TABLE json_as_string (json String) ENGINE = Memory; +INSERT INTO json_as_string FORMAT JSONAsString {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} +SELECT * FROM json_as_string; +``` + +Result: + +``` text +┌─json──────────────────────────────┐ +│ {"foo":{"bar":{"x":"y"},"baz":1}} │ +│ {} │ +│ {"any json stucture":1} │ +└───────────────────────────────────┘ +``` + + ## JSONCompact {#jsoncompact} ## JSONCompactString {#jsoncompactstring} diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 4e8347c9a6e..c737fad152f 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -26,6 +26,9 @@ toc_title: Client Libraries - [go-clickhouse](https://github.com/roistat/go-clickhouse) - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) +- Swift + - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) + - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) - NodeJs - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - [node-clickhouse](https://github.com/apla/node-clickhouse) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 89b64bfecb8..6cb65d5b9b9 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -11,6 +11,7 @@ toc_title: Adopters | Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | |------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | | Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | | Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | | Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | @@ -29,6 +30,7 @@ toc_title: Adopters | Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | | Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | | Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | | ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | | CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | @@ -36,6 +38,7 @@ toc_title: Adopters | Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | | Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | | Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | | Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | | Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | | eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | @@ -63,7 +66,8 @@ toc_title: Adopters | Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | | Mello | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | | MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) | +| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x +| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | | MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | | NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | @@ -107,7 +111,7 @@ toc_title: Adopters | Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | | Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| Yandex Metrica | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | | ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | | МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 45533d3733f..2afeabc7956 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -44,11 +44,10 @@ stages, such as query planning or distributed queries. To be useful, the tracing information has to be exported to a monitoring system that supports OpenTelemetry, such as Jaeger or Prometheus. ClickHouse avoids -a dependency on a particular monitoring system, instead only -providing the tracing data conforming to the standard. A natural way to do so -in an SQL RDBMS is a system table. OpenTelemetry trace span information +a dependency on a particular monitoring system, instead only providing the +tracing data through a system table. OpenTelemetry trace span information [required by the standard](https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#span) -is stored in the system table called `system.opentelemetry_span_log`. +is stored in the `system.opentelemetry_span_log` table. The table must be enabled in the server configuration, see the `opentelemetry_span_log` element in the default config file `config.xml`. It is enabled by default. @@ -67,3 +66,31 @@ The table has the following columns: The tags or attributes are saved as two parallel arrays, containing the keys and values. Use `ARRAY JOIN` to work with them. + +## Integration with monitoring systems + +At the moment, there is no ready tool that can export the tracing data from +ClickHouse to a monitoring system. + +For testing, it is possible to setup the export using a materialized view with the URL engine over the `system.opentelemetry_span_log` table, which would push the arriving log data to an HTTP endpoint of a trace collector. For example, to push the minimal span data to a Zipkin instance running at `http://localhost:9411`, in Zipkin v2 JSON format: + +```sql +CREATE MATERIALIZED VIEW default.zipkin_spans +ENGINE = URL('http://127.0.0.1:9411/api/v2/spans', 'JSONEachRow') +SETTINGS output_format_json_named_tuples_as_objects = 1, + output_format_json_array_of_rows = 1 AS +SELECT + lower(hex(reinterpretAsFixedString(trace_id))) AS traceId, + lower(hex(parent_span_id)) AS parentId, + lower(hex(span_id)) AS id, + operation_name AS name, + start_time_us AS timestamp, + finish_time_us - start_time_us AS duration, + cast(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint, + cast(tuple( + attribute.values[indexOf(attribute.names, 'db.statement')]), + 'Tuple("db.statement" text)') AS tags +FROM system.opentelemetry_span_log +``` + +In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 31a8e896438..e111cf3ab75 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -571,7 +571,7 @@ For more information, see the MergeTreeSettings.h header file. Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). -This setting has higher priority. +This setting has a higher priority. For more information, see the MergeTreeSettings.h header file. @@ -1081,4 +1081,45 @@ Default value: `/var/lib/clickhouse/access/`. - [Access Control and Account Management](../../operations/access-rights.md#access-control) +## user_directories {#user_directories} + +Section of the configuration file that contains settings: +- Path to configuration file with predefined users. +- Path to folder where users created by SQL commands are stored. + +If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used. + +The `user_directories` section can contain any number of items, the order of the items means their precedence (the higher the item the higher the precedence). + +**Example** + +``` xml + + + /etc/clickhouse-server/users.xml + + + /var/lib/clickhouse/access/ + + +``` + +You can also specify settings `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. + +To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: +- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. +- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authenthication attept will fail as if the provided password was incorrect. + +**Example** + +``` xml + + my_ldap_server + + + + + +``` + [Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index d83f7d6c219..ba899754b18 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -307,7 +307,51 @@ Disabled by default. ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -For TSV input format switches to parsing enum values as enum ids. +Enables or disables parsing enum values as enum ids for TSV input format. + +Possible values: + +- 0 — Enum values are parsed as values. +- 1 — Enum values are parsed as enum IDs + +Default value: 0. + +**Example** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_tsv_enum_as_number` setting is enabled: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +``` + +throws an exception. ## input_format_null_as_default {#settings-input-format-null-as-default} @@ -384,7 +428,7 @@ Possible values: - `'basic'` — Use basic parser. - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` format. For example, `'2019-08-20 10:18:56'`. + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`. Default value: `'basic'`. @@ -1182,7 +1226,47 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal ( ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -For CSV input format switches to parsing enum values as enum ids. +Enables or disables parsing enum values as enum ids for CSV input format. + +Possible values: + +- 0 — Enum values are parsed as values. +- 1 — Enum values are parsed as enum IDs. + +Default value: 0. + +**Examples** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_csv_enum_as_number` setting is enabled: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Result: + +```text +┌──Id─┬─Value─────┐ +│ 102 │ second │ +└─────┴───────────┘ +``` + +When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +``` + +throws an exception. ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} @@ -2233,4 +2317,10 @@ Possible values: Default value: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format. + +Default value: `\N`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..bc7e6335c0d --- /dev/null +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Contains information about currently running background fetches. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. + +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. + +**Example** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**See Also** + +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index ec5a619b86b..4137bd6f334 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -70,11 +70,21 @@ Parameters: + false 127.0.0.1 9000 + ... diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md index 8a2ea1eecf6..7fd608fcac0 100644 --- a/docs/en/operations/utilities/clickhouse-obfuscator.md +++ b/docs/en/operations/utilities/clickhouse-obfuscator.md @@ -1,42 +1,42 @@ -# ClickHouse obfuscator - -Simple tool for table data obfuscation. - -It reads input table and produces output table, that retain some properties of input, but contains different data. -It allows to publish almost real production data for usage in benchmarks. - -It is designed to retain the following properties of data: -- cardinalities of values (number of distinct values) for every column and for every tuple of columns; -- conditional cardinalities: number of distinct values of one column under condition on value of another column; -- probability distributions of absolute value of integers; sign of signed integers; exponent and sign for floats; -- probability distributions of length of strings; -- probability of zero values of numbers; empty strings and arrays, NULLs; -- data compression ratio when compressed with LZ77 and entropy family of codecs; -- continuity (magnitude of difference) of time values across table; continuity of floating point values. -- date component of DateTime values; -- UTF-8 validity of string values; -- string values continue to look somewhat natural. - -Most of the properties above are viable for performance testing: - -reading data, filtering, aggregation and sorting will work at almost the same speed -as on original data due to saved cardinalities, magnitudes, compression ratios, etc. - -It works in deterministic fashion: you define a seed value and transform is totally determined by input data and by seed. -Some transforms are one to one and could be reversed, so you need to have large enough seed and keep it in secret. - -It use some cryptographic primitives to transform data, but from the cryptographic point of view, -It doesn't do anything properly and you should never consider the result as secure, unless you have other reasons for it. - -It may retain some data you don't want to publish. - -It always leave numbers 0, 1, -1 as is. Also it leaves dates, lengths of arrays and null flags exactly as in source data. -For example, you have a column IsMobile in your table with values 0 and 1. In transformed data, it will have the same value. -So, the user will be able to count exact ratio of mobile traffic. - -Another example, suppose you have some private data in your table, like user email and you don't want to publish any single email address. -If your table is large enough and contain multiple different emails and there is no email that have very high frequency than all others, -It will perfectly anonymize all data. But if you have small amount of different values in a column, it can possibly reproduce some of them. -And you should take care and look at exact algorithm, how this tool works, and probably fine tune some of it command line parameters. - -This tool works fine only with reasonable amount of data (at least 1000s of rows). +# ClickHouse obfuscator + +A simple tool for table data obfuscation. + +It reads an input table and produces an output table, that retains some properties of input, but contains different data. +It allows publishing almost real production data for usage in benchmarks. + +It is designed to retain the following properties of data: +- cardinalities of values (number of distinct values) for every column and every tuple of columns; +- conditional cardinalities: number of distinct values of one column under the condition on the value of another column; +- probability distributions of the absolute value of integers; the sign of signed integers; exponent and sign for floats; +- probability distributions of the length of strings; +- probability of zero values of numbers; empty strings and arrays, `NULL`s; + +- data compression ratio when compressed with LZ77 and entropy family of codecs; +- continuity (magnitude of difference) of time values across the table; continuity of floating-point values; +- date component of `DateTime` values; + +- UTF-8 validity of string values; +- string values look natural. + +Most of the properties above are viable for performance testing: + +reading data, filtering, aggregatio, and sorting will work at almost the same speed +as on original data due to saved cardinalities, magnitudes, compression ratios, etc. + +It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed. +Some transformations are one to one and could be reversed, so you need to have a large seed and keep it in secret. + +It uses some cryptographic primitives to transform data but from the cryptographic point of view, it doesn't do it properly, that is why you should not consider the result as secure unless you have another reason. The result may retain some data you don't want to publish. + + +It always leaves 0, 1, -1 numbers, dates, lengths of arrays, and null flags exactly as in source data. +For example, you have a column `IsMobile` in your table with values 0 and 1. In transformed data, it will have the same value. + +So, the user will be able to count the exact ratio of mobile traffic. + +Let's give another example. When you have some private data in your table, like user email and you don't want to publish any single email address. +If your table is large enough and contains multiple different emails and no email has a very high frequency than all others, it will anonymize all data. But if you have a small number of different values in a column, it can reproduce some of them. +You should look at the working algorithm of this tool works, and fine-tune its command line parameters. + +This tool works fine only with an average amount of data (at least 1000s of rows). diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 270b7d8db39..543a5d3fed8 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`. - Now you can use the `groupArray` function to create an array from the `y` column: ``` sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index 4ebae95b79d..e2e6aace734 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -4,4 +4,59 @@ toc_priority: 5 # avg {#agg_function-avg} -Calculates the average. Only works for numbers. The result is always Float64. +Calculates the arithmetic mean. + +**Syntax** + +``` sql +avgWeighted(x) +``` + +**Parameter** + +- `x` — Values. + +`x` must be +[Integer](../../../sql-reference/data-types/int-uint.md), +[floating-point](../../../sql-reference/data-types/float.md), or +[Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned value** + +- `NaN` if the supplied parameter is empty. +- Mean otherwise. + +**Return type** is always [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT avg(x) FROM values('x Int8', 0, 1, 2, 3, 4, 5) +``` + +Result: + +``` text +┌─avg(x)─┐ +│ 2.5 │ +└────────┘ +``` + +**Example** + +Query: + +``` sql +CREATE table test (t UInt8) ENGINE = Memory; +SELECT avg(t) FROM test +``` + +Result: + +``` text +┌─avg(x)─┐ +│ nan │ +└────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 20b7187a744..7b9c0de2755 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -14,17 +14,21 @@ avgWeighted(x, weight) **Parameters** -- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md). -- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md). +- `x` — Values. +- `weight` — Weights of the values. -Type of `x` and `weight` must be the same. +`x` and `weight` must both be +[Integer](../../../sql-reference/data-types/int-uint.md), +[floating-point](../../../sql-reference/data-types/float.md), or +[Decimal](../../../sql-reference/data-types/decimal.md), +but may have different types. **Returned value** -- Weighted mean. -- `NaN`. If all the weights are equal to 0. +- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty. +- Weighted mean otherwise. -Type: [Float64](../../../sql-reference/data-types/float.md). +**Return type** is always [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -42,3 +46,54 @@ Result: │ 8 │ └────────────────────────┘ ``` + +**Example** + +Query: + +``` sql +SELECT avgWeighted(x, w) +FROM values('x Int8, w Float64', (4, 1), (1, 0), (10, 2)) +``` + +Result: + +``` text +┌─avgWeighted(x, weight)─┐ +│ 8 │ +└────────────────────────┘ +``` + +**Example** + +Query: + +``` sql +SELECT avgWeighted(x, w) +FROM values('x Int8, w Int8', (0, 0), (1, 0), (10, 0)) +``` + +Result: + +``` text +┌─avgWeighted(x, weight)─┐ +│ nan │ +└────────────────────────┘ +``` + +**Example** + +Query: + +``` sql +CREATE table test (t UInt8) ENGINE = Memory; +SELECT avgWeighted(t) FROM test +``` + +Result: + +``` text +┌─avgWeighted(x, weight)─┐ +│ nan │ +└────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 6e00ad8d991..b96fa887279 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -50,8 +50,6 @@ ClickHouse-specific aggregate functions: - [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) - [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) - [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) -- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md) -- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md) - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..ea44d5f1ddd --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,37 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`. +Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`. + +**Syntax** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Parameters** + +- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). + +**Returned value(s)** + +Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument. +For example for functions with the suffix `State` the return type will be `AggregateFunction`. + +**Example** + +Query: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Result: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md new file mode 100644 index 00000000000..dc23029f239 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -0,0 +1,53 @@ +## rankCorr {#agg_function-rankcorr} + +Computes a rank correlation coefficient. + +**Syntax** + +``` sql +rankCorr(x, y) +``` + +**Parameters** + +- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Returned value(s)** + +- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables. + +Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +Query: + +``` sql +SELECT rankCorr(number, number) FROM numbers(100); +``` + +Result: + +``` text +┌─rankCorr(number, number)─┐ +│ 1 │ +└──────────────────────────┘ +``` + +Query: + +``` sql +SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100); +``` + +Result: + +``` text +┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐ +│ -0.037 │ +└─────────────────────────────────────────────────────┘ +``` +**See Also** + +- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md deleted file mode 100644 index d9b519a9cde..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -toc_priority: 171 ---- - -# timeSeriesGroupRateSum {#agg-function-timeseriesgroupratesum} - -Syntax: `timeSeriesGroupRateSum(uid, ts, val)` - -Similarly to [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md), `timeSeriesGroupRateSum` calculates the rate of time-series and then sum rates together. -Also, timestamp should be in ascend order before use this function. - -Applying this function to the data from the `timeSeriesGroupSum` example, you get the following result: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md deleted file mode 100644 index aa90c7956df..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -toc_priority: 170 ---- - -# timeSeriesGroupSum {#agg-function-timeseriesgroupsum} - -Syntax: `timeSeriesGroupSum(uid, timestamp, value)` - -`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. -It will use linear interpolation between two sample timestamp and then sum time-series together. - -- `uid` is the time series unique id, `UInt64`. -- `timestamp` is Int64 type in order to support millisecond or microsecond. -- `value` is the metric. - -The function returns array of tuples with `(timestamp, aggregated_value)` pairs. - -Before using this function make sure `timestamp` is in ascending order. - -Example: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -And the result will be: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index c686a987b2f..886e93f433c 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -3,10 +3,45 @@ toc_priority: 47 toc_title: Date --- -# Date {#date} +# Date {#data_type-date} A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2106, but the final fully-supported year is 2105). The date value is stored without the time zone. +## Examples {#examples} + +**1.** Creating a table with a `DateTime`-type column and inserting data into it: + +``` sql +CREATE TABLE dt +( + `timestamp` Date, + `event_id` UInt8 +) +ENGINE = TinyLog; +``` + +``` sql +INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2); +``` + +``` sql +SELECT * FROM dt; +``` + +``` text +┌──timestamp─┬─event_id─┐ +│ 2019-01-01 │ 1 │ +│ 2019-01-01 │ 2 │ +└────────────┴──────────┘ +``` + +## See Also {#see-also} + +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) + + [Original article](https://clickhouse.tech/docs/en/data_types/date/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md new file mode 100644 index 00000000000..93b9b340e89 --- /dev/null +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -0,0 +1,91 @@ +--- +toc_priority: 46 +toc_title: Polygon Dictionaries With Grids +--- + + +# Polygon dictionaries {#polygon-dictionaries} + +Polygon dictionaries allow you to efficiently search for the polygon containing specified points. +For example: defining a city area by geographical coordinates. + +Example configuration: + +``` xml + + + + key + Array(Array(Array(Array(Float64)))) + + + + name + String + + + + + value + UInt64 + 0 + + + + + + + + + +``` + +Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +``` sql +CREATE DICTIONARY polygon_dict_name ( + key Array(Array(Array(Array(Float64)))), + name String, + value UInt64 +) +PRIMARY KEY key +LAYOUT(POLYGON()) +... +``` + +When configuring the polygon dictionary, the key must have one of two types: +- A simple polygon. It is an array of points. +- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. + +Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. + +The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse. + + +There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available: + +- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. + +- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). +Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. +The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. +The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons. +To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. + +- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. + +- POLYGON. Synonym to POLYGON_INDEX_CELL. + +Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries. +An important difference is that here the keys will be the points for which you want to find the polygon containing them. + +Example of working with the dictionary defined above: +``` sql +CREATE TABLE points ( + x Float64, + y Float64 +) +... +SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y; +``` + +As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index a5a347e553a..6582c78cb3a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -67,9 +67,8 @@ Leap seconds are not accounted for. ## toUnixTimestamp {#to-unix-timestamp} -For DateTime argument: converts value to its internal numeric representation (Unix Timestamp). -For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. -For Date argument: the behaviour is unspecified. +For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). +For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. **Syntax** @@ -337,26 +336,124 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` -## date_trunc(datepart, time_or_data\[, time_zone\]), dateTrunc(datepart, time_or_data\[, time_zone\]) {#date_trunc} +## date_trunc {#date_trunc} -Truncates a date or date with time based on the specified datepart, such as -- `second` -- `minute` -- `hour` -- `day` -- `week` -- `month` -- `quarter` -- `year` +Truncates date and time data to the specified part of date. -```sql -SELECT date_trunc('hour', now()) +**Syntax** + +``` sql +date_trunc(unit, value[, timezone]) ``` -## now {#now} +Alias: `dateTrunc`. -Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided. -This function returns a constant, even if the request took a long time to complete. +**Parameters** + +- `unit` — Part of date. [String](../syntax.md#syntax-string-literal). + Possible values: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Value, truncated to the specified part of date. + +Type: [Datetime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query without timezone: + +``` sql +SELECT now(), date_trunc('hour', now()); +``` + +Result: + +``` text +┌───────────────now()─┬─date_trunc('hour', now())─┐ +│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │ +└─────────────────────┴───────────────────────────┘ +``` + +Query with the specified timezone: + +```sql +SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); +``` + +Result: + +```text +┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐ +│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │ +└─────────────────────┴────────────────────────────────────────────┘ +``` + +**See also** + +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) + +# now {#now} + +Returns the current date and time. + +**Syntax** + +``` sql +now([timezone]) +``` + +**Parameters** + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Current date and time. + +Type: [Datetime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query without timezone: + +``` sql +SELECT now(); +``` + +Result: + +``` text +┌───────────────now()─┐ +│ 2020-10-17 07:42:09 │ +└─────────────────────┘ +``` + +Query with the specified timezone: + +``` sql +SELECT now('Europe/Moscow'); +``` + +Result: + +``` text +┌─now('Europe/Moscow')─┐ +│ 2020-10-17 10:42:23 │ +└──────────────────────┘ +``` ## today {#today} @@ -437,18 +534,7 @@ dateDiff('unit', startdate, enddate, [timezone]) - `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). - Supported values: - - | unit | - | ---- | - |second | - |minute | - |hour | - |day | - |week | - |month | - |quarter | - |year | + Supported values: second, minute, hour, day, week, month, quarter, year. - `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md new file mode 100644 index 00000000000..bef2f8137d0 --- /dev/null +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -0,0 +1,381 @@ +--- +toc_priority: 67 +toc_title: Encryption +--- + +# Encryption functions {#encryption-functions} + +These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm. + +Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128-`, `-196-`, and `-256-` modes respectively. + +Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). + +Note that these functions work slowly. + +## encrypt {#encrypt} + +This function encrypts data using these modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Syntax** + +``` sql +encrypt('mode', 'plaintext', 'key' [, iv, aad]) +``` + +**Parameters** + +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Ciphered String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Example without `iv`: + +Query: + +``` sql +SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ +│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ +│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ +│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Example with `iv`: + +Query: + +``` sql +SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ +│ aes-256-ctr │ │ +│ aes-256-ctr │ 7FB039F7 │ +│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ +└─────────────┴───────────────────────────────────────────────┘ +``` + +Example with `-gcm`: + +Query: + +``` sql +SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ +│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ +│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ +│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +Example with `-gcm` mode and with `aad`: + +Query: + +``` sql +SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ +│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ +│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ +│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +## aes_encrypt_mysql {#aes_encrypt_mysql} + +Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. + +Supported encryption modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Syntax** + +```sql +aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) +``` + +**Parameters** + +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Ciphered String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Example without `iv`: + +Query: + +``` sql +SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ +│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ +│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ +│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Example with `iv`: + +Query: + +``` sql +SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ +│ aes-256-cfb128 │ │ +│ aes-256-cfb128 │ 7FB039F7 │ +│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ +└────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## decrypt {#decrypt} + +This function decrypts data using these modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Syntax** + +```sql +decrypt('mode', 'ciphertext', 'key' [, iv, aad]) +``` + +**Parameters** + +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Query: + +``` sql + +SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +``` + +Result: + +```text +┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ +│ aes-128-ecb │ │ +│ aes-128-ecb │ text │ +│ aes-128-ecb │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## aes_decrypt_mysql {#aes_decrypt_mysql} + +Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. + +Supported decryption modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Syntax** + +```sql +aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) +``` + +**Parameters** + +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Query: + +``` sql +SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ +│ aes-128-cbc │ │ +│ aes-128-cbc │ text │ +│ aes-128-cbc │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index 065805a36ae..dd3c1900fdc 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -9,16 +9,4 @@ toc_title: IN Operator See the section [IN operators](../../sql-reference/operators/in.md#select-in-operators). -## tuple(x, y, …), operator (x, y, …) {#tuplex-y-operator-x-y} - -A function that allows grouping multiple columns. -For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function. -Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. - -## tupleElement(tuple, n), operator x.N {#tupleelementtuple-n-operator-x-n} - -A function that allows getting a column from a tuple. -‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. -There is no cost to execute the function. - [Original article](https://clickhouse.tech/docs/en/query_language/functions/in_functions/) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 3e63fc9946b..bfa1998d68a 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -306,3 +306,67 @@ execute_native_thread_routine start_thread clone ``` +## tid {#tid} + +Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed. + +**Syntax** + +``` sql +tid() +``` + +**Returned value** + +- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Example** + +Query: + +``` sql +SELECT tid(); +``` + +Result: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + +Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block). + +**Syntax** + +``` sql +logTrace('message') +``` + +**Parameters** + +- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Always returns 0. + +**Example** + +Query: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Result: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 31ed47c3195..51a1f6b4cd7 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1,5 +1,5 @@ --- -toc_priority: 66 +toc_priority: 67 toc_title: Other --- diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 881139f103c..dba8a6e275c 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -536,4 +536,58 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCas !!! note "Note" For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +## countSubstrings(haystack, needle) {#countSubstrings} + +Count the number of substring occurrences + +For a case-insensitive search, use the function `countSubstringsCaseInsensitive` (or `countSubstringsCaseInsensitiveUTF8`). + +**Syntax** + +``` sql +countSubstrings(haystack, needle[, start_pos]) +``` + +**Parameters** + +- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md) + +**Returned values** + +- Number of occurrences. + +Type: `Integer`. + +**Examples** + +Query: + +``` sql +SELECT countSubstrings('foobar.com', '.') +``` + +Result: + +``` text +┌─countSubstrings('foobar.com', '.')─┐ +│ 1 │ +└────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT countSubstrings('aaaa', 'aa') +``` + +Result: + +``` text +┌─countSubstrings('aaaa', 'aa')─┐ +│ 2 │ +└───────────────────────────────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_search_functions/) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md new file mode 100644 index 00000000000..dcbcd3e374b --- /dev/null +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -0,0 +1,114 @@ +--- +toc_priority: 66 +toc_title: Tuples +--- + +# Functions for Working with Tuples {#tuple-functions} + +## tuple {#tuple} + +A function that allows grouping multiple columns. +For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function. +Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. + +The function implements the operator `(x, y, …)`. + +**Syntax** + +``` sql +tuple(x, y, …) +``` + +## tupleElement {#tupleelement} + +A function that allows getting a column from a tuple. +‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. +There is no cost to execute the function. + +The function implements the operator `x.N`. + +**Syntax** + +``` sql +tupleElement(tuple, n) +``` + +## untuple {#untuple} + +Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location. + +**Syntax** + +``` sql +untuple(x) +``` + +You can use the `EXCEPT` expression to skip columns as a result of the query. + +**Parameters** + +- `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). + +**Returned value** + +- None. + +**Examples** + +Input table: + +``` text +┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐ +│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │ +│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │ +│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │ +│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │ +│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │ +└─────┴────┴────┴────┴────┴────┴───────────┘ +``` + +Example of using a `Tuple`-type column as the `untuple` function parameter: + +Query: + +``` sql +SELECT untuple(v6) FROM kv; +``` + +Result: + +``` text +┌─_ut_1─┬─_ut_2─┐ +│ 33 │ ab │ +│ 44 │ cd │ +│ 55 │ ef │ +│ 66 │ gh │ +│ 77 │ kl │ +└───────┴───────┘ +``` + +Example of using an `EXCEPT` expression: + +Query: + +``` sql +SELECT untuple((* EXCEPT (v2, v3),)) FROM kv; +``` + +Result: + +``` text +┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐ +│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │ +│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │ +│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │ +│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │ +│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │ +└─────┴────┴────┴────┴───────────┘ +``` + +**See Also** + +- [Tuple](../../sql-reference/data-types/tuple.md) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-functions/) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index fc066a34b0b..f8458b27a22 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -325,7 +325,59 @@ This function accepts a number or date or date with time, and returns a FixedStr ## reinterpretAsUUID {#reinterpretasuuid} -This function accepts FixedString, and returns UUID. Takes 16 bytes string. If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. + +**Syntax** + +``` sql +reinterpretAsUUID(fixed_string) +``` + +**Parameters** + +- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). + +**Returned value** + +- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). + +**Examples** + +String to UUID. + +Query: + +``` sql +SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) +``` + +Result: + +``` text +┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ +│ 08090a0b-0c0d-0e0f-0001-020304050607 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Going back and forth from String to UUID. + +Query: + +``` sql +WITH + generateUUIDv4() AS uuid, + identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, + reinterpretAsUUID(reverse(unhex(str))) AS uuid2 +SELECT uuid = uuid2; +``` + +Result: + +``` text +┌─equals(uuid, uuid2)─┐ +│ 1 │ +└─────────────────────┘ +``` ## CAST(x, T) {#type_conversion_function-cast} diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index ad63a5b72ac..0da74ce1b0e 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above). -For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. + +### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww} + +Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www". + +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### port(URL\[, default_port = 0\]) {#port} diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index d2dd1c638cc..1d34449e918 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -21,10 +21,10 @@ The following operations with [partitions](../../../engines/table-engines/merget -## DETACH PARTITION {#alter_detach-partition} +## DETACH PARTITION\|PART {#alter_detach-partition} ``` sql -ALTER TABLE table_name DETACH PARTITION partition_expr +ALTER TABLE table_name DETACH PARTITION|PART partition_expr ``` Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query. @@ -32,7 +32,8 @@ Moves all data for the specified partition to the `detached` directory. The serv Example: ``` sql -ALTER TABLE visits DETACH PARTITION 201901 +ALTER TABLE mt DETACH PARTITION '2020-11-21'; +ALTER TABLE mt DETACH PART 'all_2_2_0'; ``` Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). @@ -41,10 +42,10 @@ After the query is executed, you can do whatever you want with the data in the ` This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. -## DROP PARTITION {#alter_drop-partition} +## DROP PARTITION\|PART {#alter_drop-partition} ``` sql -ALTER TABLE table_name DROP PARTITION partition_expr +ALTER TABLE table_name DROP PARTITION|PART partition_expr ``` Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes. @@ -53,6 +54,13 @@ Read about setting the partition expression in a section [How to specify the par The query is replicated – it deletes data on all replicas. +Example: + +``` sql +ALTER TABLE mt DROP PARTITION '2020-11-21'; +ALTER TABLE mt DROP PART 'all_4_4_0'; +``` + ## DROP DETACHED PARTITION\|PART {#alter_drop-detached} ``` sql diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index b1098c54703..3fe94e267e4 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index dbfd5431861..71586e15a31 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../ ### Drawbacks {#drawbacks} -Queries that use `FINAL` are executed not as fast as similar queries that don’t, because: +Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because: -- Query is executed in a single thread and data is merged during query execution. +- Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 6cb99f285f2..500a09dcbef 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -6,7 +6,7 @@ toc_title: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: -- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”. +- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. - All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. - Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. @@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value. +## WITH ROLLUP Modifier {#with-rollup-modifier} + +`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table. + +The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression. + +In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` +As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (and `day` column is filled with zeros); +- `GROUP BY year` (now `month, day` columns are both filled with zeros); +- and totals (and all three key expression columns are zeros). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## WITH CUBE Modifier {#with-cube-modifier} + +`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table. + +In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- and totals. + +Columns, excluded from `GROUP BY`, are filled with zeros. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## WITH TOTALS Modifier {#with-totals-modifier} If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values). @@ -88,8 +236,6 @@ SELECT FROM hits ``` -However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions. - As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’. Example: @@ -105,10 +251,6 @@ GROUP BY domain For every different key value encountered, `GROUP BY` calculates a set of aggregate function values. -`GROUP BY` is not supported for array columns. - -A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`. - ## Implementation Details {#implementation-details} Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 3107f791eb9..60c769c4660 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -20,12 +20,12 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] [LIMIT [n, ]m] [WITH TIES] -[UNION ALL ...] +[UNION ...] [INTO OUTFILE filename] [FORMAT format] ``` @@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li - [SELECT clause](#select-clause) - [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) - [LIMIT clause](../../../sql-reference/statements/select/limit.md) -- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md) +- [UNION clause](../../../sql-reference/statements/select/union-all.md) - [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) - [FORMAT clause](../../../sql-reference/statements/select/format.md) @@ -159,4 +159,111 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation. -{## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##} +## SELECT modifiers {#select-modifiers} + +You can use the following modifiers in `SELECT` queries. + +### APPLY {#apply-modifier} + +Allows you to invoke some function for each row returned by an outer table expression of a query. + +**Syntax:** + +``` sql +SELECT APPLY( ) FROM [db.]table_name +``` + +**Example:** + +``` sql +CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i); +INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23); +SELECT * APPLY(sum) FROM columns_transformers; +``` + +``` +┌─sum(i)─┬─sum(j)─┬─sum(k)─┐ +│ 220 │ 18 │ 347 │ +└────────┴────────┴────────┘ +``` + +### EXCEPT {#except-modifier} + +Specifies the names of one or more columns to exclude from the result. All matching column names are omitted from the output. + +**Syntax:** + +``` sql +SELECT EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name +``` + +**Example:** + +``` sql +SELECT * EXCEPT (i) from columns_transformers; +``` + +``` +┌──j─┬───k─┐ +│ 10 │ 324 │ +│ 8 │ 23 │ +└────┴─────┘ +``` + +### REPLACE {#replace-modifier} + +Specifies one or more [expression aliases](../../../sql-reference/syntax.md#syntax-expression_aliases). Each alias must match a column name from the `SELECT *` statement. In the output column list, the column that matches the alias is replaced by the expression in that `REPLACE`. + +This modifier does not change the names or order of columns. However, it can change the value and the value type. + +**Syntax:** + +``` sql +SELECT REPLACE( AS col_name) from [db.]table_name +``` + +**Example:** + +``` sql +SELECT * REPLACE(i + 1 AS i) from columns_transformers; +``` + +``` +┌───i─┬──j─┬───k─┐ +│ 101 │ 10 │ 324 │ +│ 121 │ 8 │ 23 │ +└─────┴────┴─────┘ +``` + +### Modifier Combinations {#modifier-combinations} + +You can use each modifier separately or combine them. + +**Examples:** + +Using the same modifier multiple times. + +``` sql +SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers; +``` + +``` +┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐ +│ 2 │ 3 │ +└──────────────────────────┴──────────────────────────┘ +``` + +Using multiple modifiers in a single query. + +``` sql +SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; +``` + +``` +┌─sum(plus(i, 1))─┬─sum(k)─┐ +│ 222 │ 347 │ +└─────────────────┴────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) + diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index a4e5e3655c6..57e071d6734 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -221,3 +221,85 @@ returns │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## OFFSET FETCH Clause {#offset-fetch} + +`OFFSET` and `FETCH` allow you to retrieve data by portions. They specify a row block which you want to get by a single query. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +The `offset_row_count` or `fetch_row_count` value can be a number or a literal constant. You can omit `fetch_row_count`; by default, it equals 1. + +`OFFSET` specifies the number of rows to skip before starting to return rows from the query. + +The `FETCH` specifies the maximum number of rows that can be in the result of a query. + +The `ONLY` option is used to return rows that immediately follow the rows omitted by the `OFFSET`. In this case the `FETCH` is an alternative to the [LIMIT](../../../sql-reference/statements/select/limit.md) clause. For example, the following query + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +is identical to the query + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows. + +!!! note "Note" + According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. + +### Examples {#examples} + +Input table: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Usage of the `ONLY` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Usage of the `WITH TIES` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/order-by/) diff --git a/docs/en/sql-reference/statements/select/union-all.md b/docs/en/sql-reference/statements/select/union-all.md index 5230363609e..f150efbdc80 100644 --- a/docs/en/sql-reference/statements/select/union-all.md +++ b/docs/en/sql-reference/statements/select/union-all.md @@ -1,5 +1,5 @@ --- -toc_title: UNION ALL +toc_title: UNION --- # UNION ALL Clause {#union-all-clause} @@ -25,10 +25,13 @@ Type casting is performed for unions. For example, if two queries being combined Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. -## Limitations {#limitations} +# UNION DISTINCT Clause {#union-distinct-clause} +The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`. + +# UNION Clause {#union-clause} +By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception. -Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`. ## Implementation Details {#implementation-details} -Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together. +Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together. diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 75303fde19e..509b7553536 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -204,7 +204,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ## Managing ReplicatedMergeTree Tables {#query-language-system-replicated} -ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables. +ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication/#table_engines-replication) tables. ### STOP FETCHES {#query_language-system-stop-fetches} diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 70994f3d882..296f5c7c5f3 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -57,7 +57,7 @@ Identifiers are: Identifiers can be quoted or non-quoted. The latter is preferred. -Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` +Non-quoted identifiers must match the regex `^[0-9a-zA-Z_]*[a-zA-Z_]$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``. diff --git a/docs/en/sql-reference/table-functions/null.md b/docs/en/sql-reference/table-functions/null.md index 6edec61add7..355a45a83e1 100644 --- a/docs/en/sql-reference/table-functions/null.md +++ b/docs/en/sql-reference/table-functions/null.md @@ -5,7 +5,7 @@ toc_title: null function # null {#null-function} -Accepts an inserted data of the specified structure and immediately drops it away. The function is used for convenience writing tests and demonstrations. +Creates a temporary table of the specified structure with the [Null](../../engines/table-engines/special/null.md) table engine. According to the `Null`-engine properties, the table data is ignored and the table itself is immediately droped right after the query execution. The function is used for the convenience of test writing and demonstrations. **Syntax** @@ -19,7 +19,7 @@ null('structure') **Returned value** -A table with the specified structure, which is dropped right after the query execution. +A temporary `Null`-engine table with the specified structure. **Example** @@ -36,6 +36,8 @@ INSERT INTO t SELECT * FROM numbers_mt(1000000000); DROP TABLE IF EXISTS t; ``` -See also: format **Null**. +See also: + +- [Null table engine](../../engines/table-engines/special/null.md) [Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/null/) diff --git a/docs/es/development/contrib.md b/docs/es/development/contrib.md index 9018c19cc92..3f3013570e5 100644 --- a/docs/es/development/contrib.md +++ b/docs/es/development/contrib.md @@ -19,7 +19,6 @@ toc_title: Bibliotecas de terceros utilizadas | Más información | [Licencia de 3 cláusulas BSD](https://github.com/google/googletest/blob/master/LICENSE) | | H3 | [Licencia Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [Licencia de 3 cláusulas BSD](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [Licencia BSD de 2 cláusulas](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Licencia Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [Información adicional](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/es/sql-reference/aggregate-functions/reference.md b/docs/es/sql-reference/aggregate-functions/reference.md index 927d16bc748..572c4d01051 100644 --- a/docs/es/sql-reference/aggregate-functions/reference.md +++ b/docs/es/sql-reference/aggregate-functions/reference.md @@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d SELECT kurtSamp(value) FROM series_with_value_column ``` -## Para obtener más información, consulta nuestra Política de privacidad y nuestras Condiciones de uso) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` puede agregar diferentes series de tiempo que muestran la marca de tiempo no la alineación. -Utilizará la interpolación lineal entre dos marcas de tiempo de muestra y luego sumará series temporales juntas. - -- `uid` es la identificación única de la serie temporal, `UInt64`. -- `timestamp` es el tipo Int64 para admitir milisegundos o microsegundos. -- `value` es la métrica. - -La función devuelve una matriz de tuplas con `(timestamp, aggregated_value)` par. - -Antes de utilizar esta función, asegúrese de `timestamp` está en orden ascendente. - -Ejemplo: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -Y el resultado será: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## También puede utilizar el siguiente ejemplo:) {#agg-function-timeseriesgroupratesum} - -De manera similar a `timeSeriesGroupSum`, `timeSeriesGroupRateSum` calcula la tasa de series temporales y luego suma las tasas juntas. -Además, la marca de tiempo debe estar en orden ascendente antes de usar esta función. - -Aplicando esta función a los datos del `timeSeriesGroupSum` ejemplo, se obtiene el siguiente resultado: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## Acerca de) {#agg_function-avg} Calcula el promedio. diff --git a/docs/es/sql-reference/statements/create.md b/docs/es/sql-reference/statements/create.md index b851435286e..db3194ae114 100644 --- a/docs/es/sql-reference/statements/create.md +++ b/docs/es/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fa/development/contrib.md b/docs/fa/development/contrib.md index 25573c28125..2ee5fc73369 100644 --- a/docs/fa/development/contrib.md +++ b/docs/fa/development/contrib.md @@ -21,7 +21,6 @@ toc_title: "\u06A9\u062A\u0627\u0628\u062E\u0627\u0646\u0647 \u0647\u0627\u06CC | googletest | [لیسانس 3 بند](https://github.com/google/googletest/blob/master/LICENSE) | | اچ 3 | [نمایی مجوز 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [لیسانس 3 بند](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| لیبتری | [لیسانس 2 بند](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | شکنجه نوجوان | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | لیبیدوید | [مجوز زلب](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | نوشیدن شراب | [الجی پی ال2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/fa/sql-reference/aggregate-functions/reference.md b/docs/fa/sql-reference/aggregate-functions/reference.md index 0933bf50b20..f18dc9e6bf6 100644 --- a/docs/fa/sql-reference/aggregate-functions/reference.md +++ b/docs/fa/sql-reference/aggregate-functions/reference.md @@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [جسم شناور64](../../sql SELECT kurtSamp(value) FROM series_with_value_column ``` -## هشدار داده می شود) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` می توانید سری های زمانی مختلف که برچسب زمان نمونه هم ترازی جمع نمی. -این برون یابی خطی بین دو برچسب زمان نمونه و سپس مجموع زمان سری با هم استفاده کنید. - -- `uid` سری زمان شناسه منحصر به فرد است, `UInt64`. -- `timestamp` است نوع درون64 به منظور حمایت میلی ثانیه یا میکروثانیه. -- `value` متریک است. - -تابع گرداند مجموعه ای از تاپل با `(timestamp, aggregated_value)` جفت - -قبل از استفاده از این تابع اطمینان حاصل کنید `timestamp` به ترتیب صعودی است. - -مثال: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -و نتیجه خواهد بود: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## هشدار داده می شود) {#agg-function-timeseriesgroupratesum} - -به طور مشابه به `timeSeriesGroupSum`, `timeSeriesGroupRateSum` محاسبه نرخ زمان سری و سپس مجموع نرخ با هم. -همچنین, برچسب زمان باید در جهت صعود قبل از استفاده از این تابع باشد. - -استفاده از این تابع به داده ها از `timeSeriesGroupSum` مثال, شما نتیجه زیر را دریافت کنید: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## میانگین) {#agg_function-avg} محاسبه متوسط. diff --git a/docs/fa/sql-reference/statements/create.md b/docs/fa/sql-reference/statements/create.md index c4b7ede05dd..970e8ee7535 100644 --- a/docs/fa/sql-reference/statements/create.md +++ b/docs/fa/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fr/development/contrib.md b/docs/fr/development/contrib.md index f4006d0a787..6909ef905bd 100644 --- a/docs/fr/development/contrib.md +++ b/docs/fr/development/contrib.md @@ -19,7 +19,6 @@ toc_title: "Biblioth\xE8ques Tierces Utilis\xE9es" | googletest | [Licence BSD 3-Clause](https://github.com/google/googletest/blob/master/LICENSE) | | h3 | [Licence Apache 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [Licence BSD 3-Clause](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [Licence BSD 2-Clause](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Licence Zlib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/fr/sql-reference/aggregate-functions/reference.md b/docs/fr/sql-reference/aggregate-functions/reference.md index dcbf3bdd95a..37d150a6f68 100644 --- a/docs/fr/sql-reference/aggregate-functions/reference.md +++ b/docs/fr/sql-reference/aggregate-functions/reference.md @@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uid, horodatage, valeur) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` peut agréger différentes séries temporelles qui échantillonnent l'horodatage et non l'alignement. -Il utilisera une interpolation linéaire entre deux échantillons d'horodatage, puis additionnera les séries temporelles ensemble. - -- `uid` la série temporelle est elle unique, `UInt64`. -- `timestamp` est de type Int64 afin de prendre en charge la milliseconde ou la microseconde. -- `value` est la métrique. - -La fonction renvoie un tableau de tuples avec `(timestamp, aggregated_value)` pair. - -Avant d'utiliser cette fonction, assurez-vous `timestamp` est dans l'ordre croissant. - -Exemple: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -Et le résultat sera: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## timeSeriesGroupRateSum(uid, ts, val) {#agg-function-timeseriesgroupratesum} - -De la même manière à `timeSeriesGroupSum`, `timeSeriesGroupRateSum` calcule le taux de séries chronologiques, puis additionne les taux ensemble. -En outre, l'horodatage doit être dans l'ordre croissant avant d'utiliser cette fonction. - -Application de cette fonction aux données du `timeSeriesGroupSum` exemple, vous obtenez le résultat suivant: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## avg (x) {#agg_function-avg} Calcule la moyenne. diff --git a/docs/fr/sql-reference/statements/create.md b/docs/fr/sql-reference/statements/create.md index f7b3790baf2..e7c8040ee6e 100644 --- a/docs/fr/sql-reference/statements/create.md +++ b/docs/fr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ja/development/contrib.md b/docs/ja/development/contrib.md index 2e16b2bc72a..892d2c66a13 100644 --- a/docs/ja/development/contrib.md +++ b/docs/ja/development/contrib.md @@ -20,7 +20,6 @@ toc_title: "\u30B5\u30FC\u30C9\u30D1\u30FC\u30C6\u30A3\u88FD\u30E9\u30A4\u30D6\u | googletest | [BSD3条項ライセンス](https://github.com/google/googletest/blob/master/LICENSE) | | h3 | [Apacheライセンス2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [BSD3条項ライセンス](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD2条項ライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Zlibライセンス](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/ja/sql-reference/aggregate-functions/reference.md b/docs/ja/sql-reference/aggregate-functions/reference.md index 298de5a75b5..465f36179da 100644 --- a/docs/ja/sql-reference/aggregate-functions/reference.md +++ b/docs/ja/sql-reference/aggregate-functions/reference.md @@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uid,タイムスタンプ,値) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` 総異なる時系列のサンプルのタイムスタンプなアライメントを実施します。 -これは、二つのサンプルタイムスタンプ間の線形補間を使用して、一緒に時系列を合計します。 - -- `uid` 時系列は一意のidですか, `UInt64`. -- `timestamp` ミリ秒またはマイクロ秒をサポートするためにInt64型です。 -- `value` は指標です。 - -この関数は、次のような組の配列を返します `(timestamp, aggregated_value)` ペア。 - -この関数を使用する前に、必ず `timestamp` 昇順です。 - -例: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -結果は次のようになります: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## タイムセリエスグロプラテスム(uid,ts,val) {#agg-function-timeseriesgroupratesum} - -同様に `timeSeriesGroupSum`, `timeSeriesGroupRateSum` 時系列のレートを計算し、レートを合計します。 -また、timestampはこの関数を使用する前に上昇順にする必要があります。 - -のデータにこの関数を適用します。 `timeSeriesGroupSum` 例では、次の結果が得られます: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## avg(x) {#agg_function-avg} 平均を計算します。 diff --git a/docs/ja/sql-reference/statements/create.md b/docs/ja/sql-reference/statements/create.md index ae518dbfac8..1d1f2c57556 100644 --- a/docs/ja/sql-reference/statements/create.md +++ b/docs/ja/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` 作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index e65ab4819e8..05367267e41 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -18,7 +18,6 @@ toc_title: "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c\u044b\u | googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) | | h3 | [Apache License 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 986fe9adbb9..75ce12f50fa 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться. -Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел «Производительность»). +Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 2a23a27f4c0..9941e4f3ac5 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -555,6 +555,22 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` +## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree} + +Тонкая настройка таблиц в [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). + +Эта настройка имеет более высокий приоритет. + +Подробнее смотрите в заголовочном файле MergeTreeSettings.h. + +**Пример** + +``` xml + + 5 + +``` + ## openSSL {#server_configuration_parameters-openssl} Настройки клиента/сервера SSL. @@ -1052,4 +1068,45 @@ ClickHouse использует ZooKeeper для хранения метадан - [Управление доступом](../access-rights.md#access-control) +## user_directories {#user_directories} + +Секция конфигурационного файла,которая содержит настройки: +- Путь к конфигурационному файлу с предустановленными пользователями. +- Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд. + +Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется. + +Секция `user_directories` может содержать любое количество элементов, порядок расположения элементов обозначает их приоритет (чем выше элемент, тем выше приоритет). + +**Пример** + +``` xml + + + /etc/clickhouse-server/users.xml + + + /var/lib/clickhouse/access/ + + +``` + +Также вы можете указать настройку `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). + +Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами: +- `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым. +- `roles` — раздел со списком локально определенных ролей, которые будут назначены каждому пользователю, полученному с LDAP-сервера. Если роли не заданы, пользователь не сможет выполнять никаких действий после аутентификации. Если какая-либо из перечисленных ролей не определена локально во время проверки подлинности, попытка проверки подлинности завершится неудачей, как если бы предоставленный пароль был неверным. + +**Пример** + +``` xml + + my_ldap_server + + + + + +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_configuration_parameters/settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 39a996cb44e..af0fc3e6137 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -289,6 +289,54 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Disabled by default. +## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} + +Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. + +Возможные значения: + +- 0 — парсинг значений перечисления как значений. +- 1 — парсинг значений перечисления как идентификаторов перечисления. + +Значение по умолчанию: 0. + +**Пример** + +Рассмотрим таблицу: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +При включенной настройке `input_format_tsv_enum_as_number`: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +``` + +сгенерирует исключение. + ## input_format_null_as_default {#settings-input-format-null-as-default} Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов). @@ -1127,6 +1175,50 @@ SELECT area/period FROM account_orders FORMAT JSON; Для формата CSV включает или выключает парсинг неэкранированной строки `NULL` как литерала (синоним для `\N`) +## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} + +Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV. + +Возможные значения: + +- 0 — парсинг значений перечисления как значений. +- 1 — парсинг значений перечисления как идентификаторов перечисления. + +Значение по умолчанию: 0. + +**Пример** + +Рассмотрим таблицу: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +При включенной настройке `input_format_csv_enum_as_number`: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +``` + +сгенерирует исключение. + ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль). @@ -2095,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV. + +Значение по умолчанию: `\N`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/system-tables/replicated_fetches.md b/docs/ru/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..94584f390ee --- /dev/null +++ b/docs/ru/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик. + +Столбцы: + +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. + +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока. + +**Пример** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**Смотрите также** + +- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated) + +[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/ru/operations/utilities/clickhouse-obfuscator.md b/docs/ru/operations/utilities/clickhouse-obfuscator.md new file mode 100644 index 00000000000..a52d538965b --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-obfuscator.md @@ -0,0 +1,43 @@ +# Обфускатор ClickHouse + +Простой инструмент для обфускации табличных данных. + +Он считывает данные входной таблицы и создает выходную таблицу, которая сохраняет некоторые свойства входных данных, но при этом содержит другие данные. + +Это позволяет публиковать практически реальные данные и использовать их в тестах на производительность. + +Обфускатор предназначен для сохранения следующих свойств данных: +- кардинальность (количество уникальных данных) для каждого столбца и каждого кортежа столбцов; +- условная кардинальность: количество уникальных данных одного столбца в соответствии со значением другого столбца; +- вероятностные распределения абсолютного значения целых чисел; знак числа типа Int; показатель степени и знак для чисел с плавающей запятой; +- вероятностное распределение длины строк; +- вероятность нулевых значений чисел; пустые строки и массивы, `NULL`; +- степень сжатия данных алгоритмом LZ77 и семейством энтропийных кодеков; + +- непрерывность (величина разницы) значений времени в таблице; непрерывность значений с плавающей запятой; +- дату из значений `DateTime`; + +- кодировка UTF-8 значений строки; +- строковые значения выглядят естественным образом. + + +Большинство перечисленных выше свойств пригодны для тестирования производительности. Чтение данных, фильтрация, агрегирование и сортировка будут работать почти с той же скоростью, что и исходные данные, благодаря сохраненной кардинальности, величине, степени сжатия и т. д. + +Он работает детерминированно. Вы задаёте значение инициализатора, а преобразование полностью определяется входными данными и инициализатором. + +Некоторые преобразования выполняются один к одному, и их можно отменить. Поэтому нужно использовать большое значение инициализатора и хранить его в секрете. + + +Обфускатор использует некоторые криптографические примитивы для преобразования данных, но, с криптографической точки зрения, результат будет небезопасным. В нем могут сохраниться данные, которые не следует публиковать. + + +Он всегда оставляет без изменений числа 0, 1, -1, даты, длины массивов и нулевые флаги. +Например, если у вас есть столбец `IsMobile` в таблице со значениями 0 и 1, то в преобразованных данных он будет иметь то же значение. + +Таким образом, пользователь сможет посчитать точное соотношение мобильного трафика. + +Давайте рассмотрим случай, когда у вас есть какие-то личные данные в таблице (например, электронная почта пользователя), и вы не хотите их публиковать. +Если ваша таблица достаточно большая и содержит несколько разных электронных почтовых адресов, и ни один из них не встречается часто, то обфускатор полностью анонимизирует все данные. Но, если у вас есть небольшое количество разных значений в столбце, он может скопировать некоторые из них. +В этом случае вам следует посмотреть на алгоритм работы инструмента и настроить параметры командной строки. + +Обфускатор полезен в работе со средним объемом данных (не менее 1000 строк). diff --git a/docs/ru/sql-reference/aggregate-functions/index.md b/docs/ru/sql-reference/aggregate-functions/index.md index e7f6acee738..4a7768f587f 100644 --- a/docs/ru/sql-reference/aggregate-functions/index.md +++ b/docs/ru/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`. - Теперь с помощью функции `groupArray` сформируем массив из столбца `y`: ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/index.md b/docs/ru/sql-reference/aggregate-functions/reference/index.md index 4cbe0a0fba4..4c0060581fd 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/index.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/index.md @@ -45,8 +45,6 @@ toc_hidden: true - [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) - [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) - [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) -- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md) -- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md) - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..a2e3764193e --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,40 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`. +Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`. + +**Синтаксис** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Параметры** + +- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`. + +Пример: + +Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`. + +**Пример** + +Запрос: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Результат: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/ru/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/ru/sql-reference/aggregate-functions/reference/rankCorr.md new file mode 100644 index 00000000000..48a19e87c52 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/rankCorr.md @@ -0,0 +1,53 @@ +## rankCorr {#agg_function-rankcorr} + +Вычисляет коэффициент ранговой корреляции. + +**Синтаксис** + +``` sql +rankCorr(x, y) +``` + +**Параметры** + +- `x` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `y` — Произвольное значение. [Float32](../../../sql-reference/data-types/float.md#float32-float64) или [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Возвращаемое значение** + +- Возвращает коэффициент ранговой корреляции рангов x и y. Значение коэффициента корреляции изменяется в пределах от -1 до +1. Если передается менее двух аргументов, функция возвращает исключение. Значение, близкое к +1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины увеличивается и вторая случайная величина. Значение, близкое к -1, указывает на высокую линейную зависимость, и с увеличением одной случайной величины вторая случайная величина уменьшается. Значение, близкое или равное 0, означает отсутствие связи между двумя случайными величинами. + +Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Пример** + +Запрос: + +``` sql +SELECT rankCorr(number, number) FROM numbers(100); +``` + +Результат: + +``` text +┌─rankCorr(number, number)─┐ +│ 1 │ +└──────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT roundBankers(rankCorr(exp(number), sin(number)), 3) FROM numbers(100); +``` + +Результат: + +``` text +┌─roundBankers(rankCorr(exp(number), sin(number)), 3)─┐ +│ -0.037 │ +└─────────────────────────────────────────────────────┘ +``` +**Смотрите также** + +- [Коэффициент ранговой корреляции Спирмена](https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D1%8F#%D0%9A%D0%BE%D1%8D%D1%84%D1%84%D0%B8%D1%86%D0%B8%D0%B5%D0%BD%D1%82_%D1%80%D0%B0%D0%BD%D0%B3%D0%BE%D0%B2%D0%BE%D0%B9_%D0%BA%D0%BE%D1%80%D1%80%D0%B5%D0%BB%D1%8F%D1%86%D0%B8%D0%B8_%D0%A1%D0%BF%D0%B8%D1%80%D0%BC%D0%B5%D0%BD%D0%B0) \ No newline at end of file diff --git a/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md b/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md deleted file mode 100644 index da5935c8f61..00000000000 --- a/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -toc_priority: 171 ---- - -# timeSeriesGroupRateSum {#agg-function-timeseriesgroupratesum} - -Синтаксис: `timeSeriesGroupRateSum(uid, ts, val)` - -Аналогично timeSeriesGroupSum, timeSeriesGroupRateSum будет вычислять производные по timestamp для рядов, а затем суммировать полученные производные для всех рядов для одного значения timestamp. -Также ряды должны быть отсортированы по возрастанию timestamp. - -Для пример из описания timeSeriesGroupSum результат будет следующим: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md b/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md deleted file mode 100644 index 6a34c08c8c5..00000000000 --- a/docs/ru/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -toc_priority: 170 ---- - -# timeSeriesGroupSum {#agg-function-timeseriesgroupsum} - -Синтаксис: `timeSeriesGroupSum(uid, timestamp, value)` - -`timeSeriesGroupSum` агрегирует временные ряды в которых не совпадают моменты. -Функция использует линейную интерполяцию между двумя значениями времени, а затем суммирует значения для одного и того же момента (как измеренные так и интерполированные) по всем рядам. - -- `uid` уникальный идентификатор временного ряда, `UInt64`. -- `timestamp` имеет тип `Int64` чтобы можно было учитывать милли и микросекунды. -- `value` представляет собой значение метрики. - -Функция возвращает массив кортежей с парами `(timestamp, aggregated_value)`. - -Временные ряды должны быть отсортированы по возрастанию `timestamp`. - -Пример: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -И результат будет: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum/) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index bfd8663a754..70839d21a78 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -1,4 +1,4 @@ -# Cловари полигонов {#slovari-polygonov} +# Cловари полигонов {#polygon-dictionaries} Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов. Для примера: определение района города по географическим координатам. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 62181eebb4b..16f21e3a80c 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1157,6 +1157,7 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res ┌─res──────────┐ │ [1, 2, 3, 4] │ └──────────────┘ +``` ## arrayAUC {#arrayauc} diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index d24de2faae1..5ef10bf306e 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -57,32 +57,31 @@ SELECT ## toUnixTimestamp {#to-unix-timestamp} -For DateTime argument: converts value to its internal numeric representation (Unix Timestamp). -For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. -For Date argument: the behaviour is unspecified. +Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). +Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию). -**Syntax** +**Синтаксис** ``` sql toUnixTimestamp(datetime) toUnixTimestamp(str, [timezone]) ``` -**Returned value** +**Возвращаемое значение** -- Returns the unix timestamp. +- Возвращает Unix Timestamp. -Type: `UInt32`. +Тип: `UInt32`. -**Example** +**Пример** -Query: +Запрос: ``` sql SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp ``` -Result: +Результат: ``` text ┌─unix_timestamp─┐ @@ -234,10 +233,124 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d Переводит дату-с-временем в номер секунды, начиная с некоторого фиксированного момента в прошлом. +## date_trunc {#date_trunc} + +Отсекает от даты и времени части, меньшие чем указанная часть. + +**Синтаксис** + +``` sql +date_trunc(unit, value[, timezone]) +``` + +Синоним: `dateTrunc`. + +**Параметры** + +- `unit` — Название части даты или времени. [String](../syntax.md#syntax-string-literal). + Возможные значения: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `value` — Дата и время. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../../sql-reference/data-types/string.md) + +**Возвращаемое значение** + +- Дата и время, отсеченные до указанной части. + +Тип: [Datetime](../../sql-reference/data-types/datetime.md). + +**Примеры** + +Запрос без указания часового пояса: + +``` sql +SELECT now(), date_trunc('hour', now()); +``` + +Результат: + +``` text +┌───────────────now()─┬─date_trunc('hour', now())─┐ +│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │ +└─────────────────────┴───────────────────────────┘ +``` + +Запрос с указанием часового пояса: + +```sql +SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); +``` + +Результат: + +```text +┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐ +│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │ +└─────────────────────┴────────────────────────────────────────────┘ +``` + +**См. также** + +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) + ## now {#now} -Принимает ноль аргументов и возвращает текущее время на один из моментов выполнения запроса. -Функция возвращает константу, даже если запрос выполнялся долго. +Возвращает текущую дату и время. + +**Синтаксис** + +``` sql +now([timezone]) +``` + +**Параметры** + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). [String](../../sql-reference/data-types/string.md) + +**Возвращаемое значение** + +- Текущие дата и время. + +Тип: [Datetime](../../sql-reference/data-types/datetime.md). + +**Пример** + +Запрос без указания часового пояса: + +``` sql +SELECT now(); +``` + +Результат: + +``` text +┌───────────────now()─┐ +│ 2020-10-17 07:42:09 │ +└─────────────────────┘ +``` + +Запрос с указанием часового пояса: + +``` sql +SELECT now('Europe/Moscow'); +``` + +Результат: + +``` text +┌─now('Europe/Moscow')─┐ +│ 2020-10-17 10:42:23 │ +└──────────────────────┘ +``` ## today {#today} @@ -376,4 +489,4 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g') └────────────────────────────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) \ No newline at end of file +[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md new file mode 100644 index 00000000000..f1f6516d453 --- /dev/null +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -0,0 +1,382 @@ +--- +toc_priority: 67 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448\u0438\u0444\u0440\u043e\u0432\u0430\u043d\u0438\u044f" +--- + +# Функции шифрования {#encryption-functions} + +Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма. + +Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно. + +Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются). + +Обратите внимание, что эти функции работают медленно. + +## encrypt {#encrypt} + +Функция поддерживает шифрование данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Синтаксис** + +``` sql +encrypt('mode', 'plaintext', 'key' [, iv, aad]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов необязателен. [String](../../sql-reference/data-types/string.md#string). +- `aad` — дополнительные аутентифицированные данные. Не шифруются, но влияют на расшифровку. Параметр работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Пример без `iv`: + +Запрос: + +``` sql +SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ +│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ +│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ +│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Пример с `iv`: + +Запрос: + +``` sql +SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ +│ aes-256-ctr │ │ +│ aes-256-ctr │ 7FB039F7 │ +│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ +└─────────────┴───────────────────────────────────────────────┘ +``` + +Пример в режиме `-gcm`: + +Запрос: + +``` sql +SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ +│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ +│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ +│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +Пример в режиме `-gcm` и с `aad`: + +Запрос: + +``` sql +SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ +│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ +│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ +│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +## aes_encrypt_mysql {#aes_encrypt_mysql} + +Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). + +Функция поддерживает шифрофание данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Синтаксис** + +```sql +aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Пример без `iv`: + +Запрос: + +``` sql +SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ +│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ +│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ +│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Пример с `iv`: + +Запрос: + +``` sql +SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ +│ aes-256-cfb128 │ │ +│ aes-256-cfb128 │ 7FB039F7 │ +│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ +└────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## decrypt {#decrypt} + +Функция поддерживает расшифровку данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Синтаксис** + +```sql +decrypt('mode', 'ciphertext', 'key' [, iv, aad]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов опциональный. [String](../../sql-reference/data-types/string.md#string). +- `aad` — дополнительные аутентифицированные данные. Текст не будет расшифрован, если это значение неверно. Работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Запрос: + +``` sql + +SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +``` + +Результат: + +```text +┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ +│ aes-128-ecb │ │ +│ aes-128-ecb │ text │ +│ aes-128-ecb │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## aes_decrypt_mysql {#aes_decrypt_mysql} + +Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). + +Функция поддерживает расшифровку данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Синтаксис** + +```sql +aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). + + +**Возвращаемое значение** + +- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Запрос: + +``` sql +SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ +│ aes-128-cbc │ │ +│ aes-128-cbc │ text │ +│ aes-128-cbc │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) diff --git a/docs/ru/sql-reference/functions/in-functions.md b/docs/ru/sql-reference/functions/in-functions.md index e137187a36b..b732f67303b 100644 --- a/docs/ru/sql-reference/functions/in-functions.md +++ b/docs/ru/sql-reference/functions/in-functions.md @@ -9,16 +9,4 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u Смотрите раздел [Операторы IN](../operators/in.md#select-in-operators). -## tuple(x, y, …), оператор (x, y, …) {#tuplex-y-operator-x-y} - -Функция, позволяющая сгруппировать несколько столбцов. -Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит. -Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу. - -## tupleElement(tuple, n), оператор x.N {#tupleelementtuple-n-operator-x-n} - -Функция, позволяющая достать столбец из кортежа. -N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа. -Выполнение функции ничего не стоит. - [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/in_functions/) diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 9f4f2ebd1e9..00dd660bc16 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -306,3 +306,68 @@ execute_native_thread_routine start_thread clone ``` + +## tid {#tid} + +Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +tid() +``` + +**Возвращаемое значение** + +- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Пример** + +Запрос: + +``` sql +SELECT tid(); +``` + +Результат: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + + Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +logTrace('message') +``` + +**Параметры** + +- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Всегда возвращает 0. + +**Example** + +Запрос: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Результат: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md new file mode 100644 index 00000000000..f88886ec6f1 --- /dev/null +++ b/docs/ru/sql-reference/functions/tuple-functions.md @@ -0,0 +1,114 @@ +--- +toc_priority: 68 +toc_title: Функции для работы с кортежами +--- + +# Функции для работы с кортежами {#tuple-functions} + +## tuple {#tuple} + +Функция, позволяющая сгруппировать несколько столбцов. +Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит. +Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу. + +С помощью функции реализуется оператор `(x, y, …)`. + +**Синтаксис** + +``` sql +tuple(x, y, …) +``` + +## tupleElement {#tupleelement} + +Функция, позволяющая достать столбец из кортежа. +N - индекс столбца начиная с 1. N должно быть константой. N должно быть целым строго положительным числом не большим размера кортежа. +Выполнение функции ничего не стоит. + +С помощью функции реализуется оператор `x.N`. + +**Синтаксис** + +``` sql +tupleElement(tuple, n) +``` + +## untuple {#untuple} + +Выполняет синтаксическую подстановку элементов [кортежа](../../sql-reference/data-types/tuple.md#tuplet1-t2) в место вызова. + +**Синтаксис** + +``` sql +untuple(x) +``` + +Чтобы пропустить некоторые столбцы в результате запроса, вы можете использовать выражение `EXCEPT`. + +**Параметры** + +- `x` - функция `tuple`, столбец или кортеж элементов. [Tuple](../../sql-reference/data-types/tuple.md). + +**Возвращаемое значение** + +- Нет. + +**Примеры** + +Входная таблица: + +``` text +┌─key─┬─v1─┬─v2─┬─v3─┬─v4─┬─v5─┬─v6────────┐ +│ 1 │ 10 │ 20 │ 40 │ 30 │ 15 │ (33,'ab') │ +│ 2 │ 25 │ 65 │ 70 │ 40 │ 6 │ (44,'cd') │ +│ 3 │ 57 │ 30 │ 20 │ 10 │ 5 │ (55,'ef') │ +│ 4 │ 55 │ 12 │ 7 │ 80 │ 90 │ (66,'gh') │ +│ 5 │ 30 │ 50 │ 70 │ 25 │ 55 │ (77,'kl') │ +└─────┴────┴────┴────┴────┴────┴───────────┘ +``` + +Пример использования столбца типа `Tuple` в качестве параметра функции `untuple`: + +Запрос: + +``` sql +SELECT untuple(v6) FROM kv; +``` + +Результат: + +``` text +┌─_ut_1─┬─_ut_2─┐ +│ 33 │ ab │ +│ 44 │ cd │ +│ 55 │ ef │ +│ 66 │ gh │ +│ 77 │ kl │ +└───────┴───────┘ +``` + +Пример использования выражения `EXCEPT`: + +Запрос: + +``` sql +SELECT untuple((* EXCEPT (v2, v3),)) FROM kv; +``` + +Результат: + +``` text +┌─key─┬─v1─┬─v4─┬─v5─┬─v6────────┐ +│ 1 │ 10 │ 30 │ 15 │ (33,'ab') │ +│ 2 │ 25 │ 40 │ 6 │ (44,'cd') │ +│ 3 │ 57 │ 10 │ 5 │ (55,'ef') │ +│ 4 │ 55 │ 80 │ 90 │ (66,'gh') │ +│ 5 │ 30 │ 25 │ 55 │ (77,'kl') │ +└─────┴────┴────┴────┴───────────┘ +``` + +**Смотрите также** + +- [Tuple](../../sql-reference/data-types/tuple.md) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-functions/) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 3733e570f10..4a314bd22d8 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -319,6 +319,62 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut Функция принимает число или дату или дату-с-временем и возвращает строку, содержащую байты, представляющие соответствующее значение в host order (little endian). При этом, отбрасываются нулевые байты с конца. Например, значение 255 типа UInt32 будет строкой длины 1 байт. +## reinterpretAsUUID {#reinterpretasuuid} + +Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количетсвом нулевых байт с конца. Если строка длиннее, чем шестнадцать байт, то игнорируются лишние байты с конца. + +**Синтаксис** + +``` sql +reinterpretAsUUID(fixed_string) +``` + +**Параметры** + +- `fixed_string` — cтрока с big-endian порядком байтов. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). + +**Возвращаемое значение** + +- Значение типа [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). + +**Примеры** + +Интерпретация строки как UUID. + +Запрос: + +``` sql +SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) +``` + +Результат: + +``` text +┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ +│ 08090a0b-0c0d-0e0f-0001-020304050607 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Переход в UUID и обратно. + +Запрос: + +``` sql +WITH + generateUUIDv4() AS uuid, + identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, + reinterpretAsUUID(reverse(unhex(str))) AS uuid2 +SELECT uuid = uuid2; +``` + +Результат: + +``` text +┌─equals(uuid, uuid2)─┐ +│ 1 │ +└─────────────────────┘ +``` + ## CAST(x, T) {#type_conversion_function-cast} Преобразует x в тип данных t. diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 5c4a23428ad..df0123c63f1 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -19,10 +19,10 @@ toc_title: PARTITION - [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера; - [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том. -## DETACH PARTITION {#alter_detach-partition} +## DETACH PARTITION\|PART {#alter_detach-partition} ``` sql -ALTER TABLE table_name DETACH PARTITION partition_expr +ALTER TABLE table_name DETACH PARTITION|PART partition_expr ``` Перемещает заданную партицию в директорию `detached`. Сервер не будет знать об этой партиции до тех пор, пока вы не выполните запрос [ATTACH](#alter_attach-partition). @@ -30,7 +30,8 @@ ALTER TABLE table_name DETACH PARTITION partition_expr Пример: ``` sql -ALTER TABLE visits DETACH PARTITION 201901 +ALTER TABLE mt DETACH PARTITION '2020-11-21'; +ALTER TABLE mt DETACH PART 'all_2_2_0'; ``` Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr). @@ -39,10 +40,10 @@ ALTER TABLE visits DETACH PARTITION 201901 Запрос реплицируется — данные будут перенесены в директорию `detached` и забыты на всех репликах. Обратите внимание, запрос может быть отправлен только на реплику-лидер. Чтобы узнать, является ли реплика лидером, выполните запрос `SELECT` к системной таблице [system.replicas](../../../operations/system-tables/replicas.md#system_tables-replicas). Либо можно выполнить запрос `DETACH` на всех репликах — тогда на всех репликах, кроме реплики-лидера, запрос вернет ошибку. -## DROP PARTITION {#alter_drop-partition} +## DROP PARTITION\|PART {#alter_drop-partition} ``` sql -ALTER TABLE table_name DROP PARTITION partition_expr +ALTER TABLE table_name DROP PARTITION|PART partition_expr ``` Удаляет партицию. Партиция помечается как неактивная и будет полностью удалена примерно через 10 минут. @@ -51,6 +52,13 @@ ALTER TABLE table_name DROP PARTITION partition_expr Запрос реплицируется — данные будут удалены на всех репликах. +Пример: + +``` sql +ALTER TABLE mt DROP PARTITION '2020-11-21'; +ALTER TABLE mt DROP PART 'all_4_4_0'; +``` + ## DROP DETACHED PARTITION\|PART {#alter_drop-detached} ``` sql diff --git a/docs/ru/sql-reference/statements/create/dictionary.md b/docs/ru/sql-reference/statements/create/dictionary.md index a20dc812e02..3134a89483b 100644 --- a/docs/ru/sql-reference/statements/create/dictionary.md +++ b/docs/ru/sql-reference/statements/create/dictionary.md @@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). @@ -27,5 +27,5 @@ LIFETIME([MIN val1] MAX val2) Смотрите [Внешние словари](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index 491bbfe892b..8facf140118 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ toc_title: FROM ### Недостатки {#drawbacks} -Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что: +Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что: -- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса. +- Данные мёржатся во время выполнения запроса. - Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе. **В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##} diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index a0454ef1d91..0c8a29d0c26 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -43,6 +43,153 @@ toc_title: GROUP BY Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением. +## Модификатор WITH ROLLUP {#with-rollup-modifier} + +Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Запрос: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (а колонка `day` заполнена нулями); +- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями); +- и общий итог (все три колонки с ключевыми выражениями заполнены нулями). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## Модификатор WITH CUBE {#with-cube-modifier} + +Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- и общий итог. + +Колонки, которые не участвуют в `GROUP BY`, заполнены нулями. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## Модификатор WITH TOTALS {#with-totals-modifier} Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения). @@ -86,8 +233,6 @@ SELECT FROM hits ``` -Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций. - В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max. Пример: @@ -103,10 +248,6 @@ GROUP BY domain GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций. -Не поддерживается GROUP BY по столбцам-массивам. - -Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`. - ## Детали реализации {#implementation-details} Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index f5fe2788370..c2e05f05079 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index 0476c5da5af..ea0f40b2dc0 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -214,3 +214,85 @@ ORDER BY │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## Секция OFFSET FETCH {#offset-fetch} + +`OFFSET` и `FETCH` позволяют извлекать данные по частям. Они указывают строки, которые вы хотите получить в результате запроса. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +`offset_row_count` или `fetch_row_count` может быть числом или литеральной константой. Если вы не используете `fetch_row_count`, то его значение равно 1. + +`OFFSET` указывает количество строк, которые необходимо пропустить перед началом возврата строк из запроса. + +`FETCH` указывает максимальное количество строк, которые могут быть получены в результате запроса. + +Опция `ONLY` используется для возврата строк, которые следуют сразу же за строками, пропущенными секцией `OFFSET`. В этом случае `FETCH` — это альтернатива [LIMIT](../../../sql-reference/statements/select/limit.md). Например, следующий запрос + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +идентичен запросу + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк. + +!!! note "Примечание" + Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. + +### Примеры {#examples} + +Входная таблица: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Использование опции `ONLY`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Использование опции `WITH TIES`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/order-by/) diff --git a/docs/ru/sql-reference/table-functions/null.md b/docs/ru/sql-reference/table-functions/null.md new file mode 100644 index 00000000000..8e0173733f8 --- /dev/null +++ b/docs/ru/sql-reference/table-functions/null.md @@ -0,0 +1,43 @@ +--- +toc_priority: 53 +toc_title: null функция +--- + +# null {#null-function} + +Создает временную таблицу указанной структуры с движком [Null](../../engines/table-engines/special/null.md). В соответствии со свойствами движка, данные в таблице игнорируются, а сама таблица удаляется сразу после выполнения запроса. Функция используется для удобства написания тестов и демонстрационных примеров. + +**Синтаксис** + +``` sql +null('structure') +``` + +**Параметр** + +- `structure` — список колонок и их типов. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Временная таблица указанной структуры с движком `Null`. + +**Пример** + +Один запрос с функцией `null`: + +``` sql +INSERT INTO function null('x UInt64') SELECT * FROM numbers_mt(1000000000); +``` +заменяет три запроса: + +```sql +CREATE TABLE t (x UInt64) ENGINE = Null; +INSERT INTO t SELECT * FROM numbers_mt(1000000000); +DROP TABLE IF EXISTS t; +``` + +См. также: + +- [Движок таблиц Null](../../engines/table-engines/special/null.md) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/null/) diff --git a/docs/ru/whats-new/extended-roadmap.md b/docs/ru/whats-new/extended-roadmap.md index 57a29ce90ad..aff8e1cbcfb 100644 --- a/docs/ru/whats-new/extended-roadmap.md +++ b/docs/ru/whats-new/extended-roadmap.md @@ -15,8 +15,6 @@ Задача «normalized z-Order curve» в перспективе может быть полезна для БК и Метрики, так как позволяет смешивать OrderID и PageID и избежать дублирования данных. В задаче также вводится способ индексации путём обращения функции нескольких аргументов на интервале, что имеет смысл для дальнейшего развития. -[Андрей Чулков](https://github.com/achulkov2), ВШЭ. - ### 1.2. + Wait-free каталог баз данных {#wait-free-katalog-baz-dannykh} Q2. Делает [Александр Токмаков](https://github.com/tavplubix), первый рабочий вариант в декабре 2019. Нужно для DataLens и Яндекс.Метрики. @@ -292,7 +290,8 @@ Upd. Иван Блинков сделал эту задачу путём зам ### 4.1. Уменьшение числа потоков при распределённых запросах {#umenshenie-chisla-potokov-pri-raspredelionnykh-zaprosakh} -Весна 2020. Upd. Есть прототип. Upd. Он не работает. Upd. Человек отказался от задачи, теперь сроки не определены. +Upd. Есть прототип. Upd. Он не работает. Upd. Человек отказался от задачи, теперь сроки не определены. +Upd. Павел Круглов, весна 2021. ### 4.2. Спекулятивное выполнение запросов на нескольких репликах {#spekuliativnoe-vypolnenie-zaprosov-na-neskolkikh-replikakh} @@ -306,6 +305,8 @@ Upd. Иван Блинков сделал эту задачу путём зам Upd. Сейчас обсуждается, как сделать другую задачу вместо этой. +Павел Круглов, весна 2021. + ### 4.3. Ограничение числа одновременных скачиваний с реплик {#ogranichenie-chisla-odnovremennykh-skachivanii-s-replik} Изначально делал Олег Алексеенков, но пока решение не готово, хотя там не так уж много доделывать. @@ -320,9 +321,10 @@ Upd. Сейчас обсуждается, как сделать другую з ### 4.7. Ленивая загрузка множеств для IN и JOIN с помощью k/v запросов {#lenivaia-zagruzka-mnozhestv-dlia-in-i-join-s-pomoshchiu-kv-zaprosov} -### 4.8. Разделить background pool для fetch и merge {#razdelit-background-pool-dlia-fetch-i-merge} +### 4.8. + Разделить background pool для fetch и merge {#razdelit-background-pool-dlia-fetch-i-merge} -В очереди. Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3. +Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3. +Ура, готово! Сделал Александр Сапин. ## 5. Операции {#operatsii} @@ -381,6 +383,7 @@ Upd. Появилась вторая версия LTS - 20.3. ### 6.5. Эксперименты с LLVM X-Ray {#eksperimenty-s-llvm-x-ray} Требует 2.2. +Перенос на 2021 или отмена. ### 6.6. + Стек трейс для любых исключений {#stek-treis-dlia-liubykh-iskliuchenii} @@ -401,6 +404,8 @@ Upd. В разработке. ### 6.10. Сбор общих системных метрик {#sbor-obshchikh-sistemnykh-metrik} +Перенос на весну 2021. + ## 7. Сопровождение разработки {#soprovozhdenie-razrabotki} @@ -461,7 +466,7 @@ UBSan включен в функциональных тестах, но не в ### 7.12. Показывать тестовое покрытие нового кода в PR {#pokazyvat-testovoe-pokrytie-novogo-koda-v-pr} Пока есть просто показ тестового покрытия всего кода. -Отложено. +Отложено на весну 2021. ### 7.13. + Включение аналога -Weverything в gcc {#vkliuchenie-analoga-weverything-v-gcc} @@ -512,6 +517,7 @@ Upd. Минимальная подсветка добавлена, а все о Поводом использования libressl послужило желание нашего хорошего друга из известной компании несколько лет назад. Но сейчас ситуация состоит в том, что openssl продолжает развиваться, а libressl не особо, и можно спокойно менять обратно. Нужно для Яндекс.Облака для поддержки TLS 1.3. +Теперь нужно заменить OpenSSL на BoringSSL. ### 7.16. + tzdata внутри бинарника {#tzdata-vnutri-binarnika} @@ -612,7 +618,7 @@ Upd. Эльдар Заитов добавляет OSS Fuzz. Upd. Сделаны randomString, randomFixedString. Upd. Сделаны fuzzBits. -### 7.24. Fuzzing лексера и парсера запросов; кодеков и форматов {#fuzzing-leksera-i-parsera-zaprosov-kodekov-i-formatov} +### 7.24. + Fuzzing лексера и парсера запросов; кодеков и форматов {#fuzzing-leksera-i-parsera-zaprosov-kodekov-i-formatov} Продолжение 7.23. @@ -656,6 +662,7 @@ Upd. В Аркадии частично работает небольшая ча ### 7.30. Возможность переключения бинарных файлов на продакшене без выкладки пакетов {#vozmozhnost-perekliucheniia-binarnykh-failov-na-prodakshene-bez-vykladki-paketov} Низкий приоритет. +Сделали файл clickhouse.old. ### 7.31. Зеркалирование нагрузки между серверами {#zerkalirovanie-nagruzki-mezhdu-serverami} @@ -737,7 +744,7 @@ Upd. Задача взята в работу. ### 8.6. Kerberos аутентификация для HDFS и Kafka {#kerberos-autentifikatsiia-dlia-hdfs-i-kafka} Андрей Коняев, ArenaData. Он куда-то пропал. -Upd. В процессе работа для Kafka. +Для Kafka готово, для HDFS в процессе. ### 8.7. + Исправление мелочи HDFS на очень старых ядрах Linux {#ispravlenie-melochi-hdfs-na-ochen-starykh-iadrakh-linux} @@ -1024,14 +1031,14 @@ Upd. Сделано хранение прав. До готового к испо [Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в декабре 2019. Q1. Сделано управление правами полностью, но не реализовано их хранение, см. 12.1. -### 12.3. Подключение справочника пользователей и прав доступа из LDAP {#podkliuchenie-spravochnika-polzovatelei-i-prav-dostupa-iz-ldap} +### 12.3. + Подключение справочника пользователей и прав доступа из LDAP {#podkliuchenie-spravochnika-polzovatelei-i-prav-dostupa-iz-ldap} Аутентификация через LDAP - Денис Глазачев. [Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1. Q3. Upd. Pull request на финальной стадии. -### 12.4. Подключение IDM системы Яндекса как справочника пользователей и прав доступа {#podkliuchenie-idm-sistemy-iandeksa-kak-spravochnika-polzovatelei-i-prav-dostupa} +### 12.4. - Подключение IDM системы Яндекса как справочника пользователей и прав доступа {#podkliuchenie-idm-sistemy-iandeksa-kak-spravochnika-polzovatelei-i-prav-dostupa} Пока низкий приоритет. Нужно для Метрики. Требует 12.3. Отложено. @@ -1051,7 +1058,7 @@ Upd. Есть pull request. ### 13.1. Overcommit запросов по памяти и вытеснение {#overcommit-zaprosov-po-pamiati-i-vytesnenie} -Требует 2.1. Способ реализации обсуждается. Александр Казаков. +Требует 2.1. Способ реализации обсуждается. ### 13.2. Общий конвейер выполнения на сервер {#obshchii-konveier-vypolneniia-na-server} @@ -1059,8 +1066,6 @@ Upd. Есть pull request. ### 13.3. Пулы ресурсов {#puly-resursov} -Александр Казаков. - Требует 13.2 или сможем сделать более неудобную реализацию раньше. Обсуждается вариант неудобной реализации. Пока средний приоритет, целимся на Q1/Q2. Вариант реализации выбрал Александр Казаков. @@ -1068,6 +1073,7 @@ Upd. Не уследили, и задачу стали обсуждать мен Upd. Задачу смотрит Александр Казаков. Upd. Задача взята в работу. Upd. Задача как будто взята в работу. +Upd. Задачу не сделал. ## 14. Диалект SQL {#dialekt-sql} @@ -1082,19 +1088,18 @@ Upd. Задача как будто взята в работу. ### 14.3. Поддержка подстановок для множеств в правой части IN {#podderzhka-podstanovok-dlia-mnozhestv-v-pravoi-chasti-in} -### 14.4. Поддержка подстановок для идентификаторов (имён) в SQL запросе {#podderzhka-podstanovok-dlia-identifikatorov-imion-v-sql-zaprose} +### 14.4. + Поддержка подстановок для идентификаторов (имён) в SQL запросе {#podderzhka-podstanovok-dlia-identifikatorov-imion-v-sql-zaprose} -zhang2014 -Задача на паузе. +Amos Bird сделал. ### 14.5. + Поддержка задания множества как массива в правой части секции IN {#podderzhka-zadaniia-mnozhestva-kak-massiva-v-pravoi-chasti-sektsii-in} Василий Немков, Altinity, делал эту задачу, но забросил её в пользу других задач. В результате, сейчас доделывает Антон Попов. -### 14.6. Глобальный scope для WITH {#globalnyi-scope-dlia-with} +### 14.6. + Глобальный scope для WITH {#globalnyi-scope-dlia-with} -В обсуждении. Amos Bird. +Amos Bird сделал. ### 14.7. Nullable для WITH ROLLUP, WITH CUBE, WITH TOTALS {#nullable-dlia-with-rollup-with-cube-with-totals} @@ -1148,13 +1153,13 @@ Upd. Есть pull request. Готово. ### 14.17. + Ввести понятие stateful функций {#vvesti-poniatie-stateful-funktsii} -zhang2014. Для runningDifference, neighbour - их учёт в оптимизаторе запросов. В интерфейсе уже сделано. Надо проверить, что учитывается в нужных местах (например, что работает predicate pushdown сквозь ORDER BY, если таких функций нет). +Александр Кузьменков. -### 14.18. UNION DISTINCT и возможность включить его по-умолчанию {#union-distinct-i-vozmozhnost-vkliuchit-ego-po-umolchaniiu} +### 14.18. + UNION DISTINCT и возможность включить его по-умолчанию {#union-distinct-i-vozmozhnost-vkliuchit-ego-po-umolchaniiu} -Для BI систем. +Для BI систем. flynn ucasFL. ### 14.19. + Совместимость парсера типов данных с SQL {#sovmestimost-parsera-tipov-dannykh-s-sql} @@ -1278,7 +1283,7 @@ Upd. Есть pull request. Исправление фундаментальной проблемы - есть PR. Фундаментальная проблема решена. -### 18.2. Агрегатные функции для статистических тестов {#agregatnye-funktsii-dlia-statisticheskikh-testov} +### 18.2. + Агрегатные функции для статистических тестов {#agregatnye-funktsii-dlia-statisticheskikh-testov} Артём Цыганов, Руденский Константин Игоревич, Семёнов Денис, ВШЭ. @@ -1286,6 +1291,7 @@ Upd. Есть pull request. Сделали прототип двух тестов, есть pull request. Также есть pull request для корелляции рангов. Upd. Помержили корелляцию рангов, но ещё не помержили сравнение t-test, u-test. +Upd. Всё доделал Никита Михайлов. ### 18.3. Инфраструктура для тренировки моделей в ClickHouse {#infrastruktura-dlia-trenirovki-modelei-v-clickhouse} @@ -1295,7 +1301,7 @@ Upd. Помержили корелляцию рангов, но ещё не по ## 19. Улучшение работы кластера {#uluchshenie-raboty-klastera} -### 19.1. Параллельные кворумные вставки без линеаризуемости {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti} +### 19.1. + Параллельные кворумные вставки без линеаризуемости {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti} Upd. В работе, ожидается в начале октября. @@ -1361,6 +1367,8 @@ Upd. Задача в разработке. ### 20.2. Поддержка DELETE путём преобразования множества ключей в множество row_numbers на реплике, столбца флагов и индекса по диапазонам {#podderzhka-delete-putiom-preobrazovaniia-mnozhestva-kliuchei-v-mnozhestvo-row-numbers-na-replike-stolbtsa-flagov-i-indeksa-po-diapazonam} +Задача назначена на 2021. + ### 20.3. Поддержка ленивых DELETE путём запоминания выражений и преобразования к множеству ключей в фоне {#podderzhka-lenivykh-delete-putiom-zapominaniia-vyrazhenii-i-preobrazovaniia-k-mnozhestvu-kliuchei-v-fone} ### 20.4. Поддержка UPDATE с помощью преобразования в DELETE и вставок {#podderzhka-update-s-pomoshchiu-preobrazovaniia-v-delete-i-vstavok} @@ -1413,6 +1421,7 @@ ucasFL, в разработке. Готово. [Achimbab](https://github.com/achimbab). Есть pull request. Но это не совсем то. Upd. В обсуждении. +Upd. Назначено на 2021. ### 21.8. Взаимная интеграция аллокатора и кэша {#vzaimnaia-integratsiia-allokatora-i-kesha} @@ -1427,6 +1436,7 @@ Upd. В обсуждении. Upd. Есть нерабочий прототип, скорее всего будет отложено. Upd. Отложено до осени. Upd. Отложено до. +Upd. Отложено. ### 21.8.1. Отдельный аллокатор для кэшей с ASLR {#otdelnyi-allokator-dlia-keshei-s-aslr} @@ -1517,7 +1527,7 @@ Upd. Сделаны самые существенные из предложен Для сортировки по кортежам используется обычная сортировка с компаратором, который в цикле по элементам кортежа делает виртуальные вызовы `IColumn::compareAt`. Это неоптимально - как из-за короткого цикла по неизвестному в compile-time количеству элементов, так и из-за виртуальных вызовов. Чтобы обойтись без виртуальных вызовов, есть метод `IColumn::getPermutation`. Он используется в случае сортировки по одному столбцу. Есть вариант, что в случае сортировки по кортежу, что-то похожее тоже можно применить… например, сделать метод `updatePermutation`, принимающий аргументы offset и limit, и допереставляющий перестановку в диапазоне значений, в которых предыдущий столбец имел равные значения. -3. RadixSort для сортировки. +\+ 3. RadixSort для сортировки. Один наш знакомый начал делать задачу по попытке использования RadixSort для сортировки столбцов. Был сделан вариант indirect сортировки (для `getPermutation`), но не оптимизирован до конца - есть лишние ненужные перекладывания элементов. Для того, чтобы его оптимизировать, придётся добавить немного шаблонной магии (на последнем шаге что-то не копировать, вместо перекладывания индексов - складывать их в готовое место). Также этот человек добавил метод MSD Radix Sort для реализации radix partial sort. Но даже не проверил производительность. @@ -1527,7 +1537,9 @@ Upd. Сделаны самые существенные из предложен Виртуальный метод `compareAt` возвращает -1, 0, 1. Но алгоритмы сортировки сравнениями обычно рассчитаны на `operator<` и не могут получить преимущества от three-way comparison. А можно ли написать так, чтобы преимущество было? -5. pdq partial sort +\+ 5. pdq partial sort + +Upd. Данила Кутенин решил эту задачу ультимативно, используя Floyd–Rivest алгоритм. Хороший алгоритм сортировки сравнениями `pdqsort` не имеет варианта partial sort. Заметим, что на практике, почти все сортировки в запросах ClickHouse являются partial_sort, так как `ORDER BY` почти всегда идёт с `LIMIT`. Кстати, Данила Кутенин уже попробовал это и показал, что в тривиальном случае преимущества нет. Но не очевидно, что нельзя сделать лучше. @@ -1619,6 +1631,7 @@ Upd. Добавили таймауты. Altinity. Я не в курсе, какой статус. +Там предлагают очень сложное решение вместо простого. ### 22.16. + Исправление низкой производительности кодека DoubleDelta {#ispravlenie-nizkoi-proizvoditelnosti-kodeka-doubledelta} @@ -1656,15 +1669,15 @@ Upd. Готово. Нужно для Метрики. Алексей Миловидов. -### 22.25. Избавиться от библиотеки btrie {#izbavitsia-ot-biblioteki-btrie} +### 22.25. + Избавиться от библиотеки btrie {#izbavitsia-ot-biblioteki-btrie} -Алексей Миловидов. Низкий приоритет. +Владимир Черкасов сделал эту задачу. ### 22.26. Плохая производительность quantileTDigest {#plokhaia-proizvoditelnost-quantiletdigest} [#2668](https://github.com/ClickHouse/ClickHouse/issues/2668) -Алексей Миловидов или будет переназначено. +Павел Круглов и Илья Щербак (ВК). ### 22.27. Проверить несколько PR, которые были закрыты zhang2014 и sundy-li {#proverit-neskolko-pr-kotorye-byli-zakryty-zhang2014-i-sundy-li} @@ -1766,7 +1779,7 @@ Upd. Отменено. Виталий Баранов. Отложено, после бэкапов. -### 24.5. Поддержка функций шифрования для отдельных значений {#podderzhka-funktsii-shifrovaniia-dlia-otdelnykh-znachenii} +### 24.5. + Поддержка функций шифрования для отдельных значений {#podderzhka-funktsii-shifrovaniia-dlia-otdelnykh-znachenii} Смотрите также 24.5. @@ -1775,6 +1788,7 @@ Upd. Отменено. Делает Василий Немков, Altinity Есть pull request в процессе ревью, исправляем проблемы производительности. +Сейчас в состоянии, что уже добавлено в продакшен, но производительность всё ещё низкая (тех долг). ### 24.6. Userspace RAID {#userspace-raid} @@ -1825,7 +1839,7 @@ RAID позволяет одновременно увеличить надёжн Upd. Есть pull request. В стадии ревью. Готово. -### 24.10. Поддержка типов half/bfloat16/unum {#podderzhka-tipov-halfbfloat16unum} +### 24.10. - Поддержка типов half/bfloat16/unum {#podderzhka-tipov-halfbfloat16unum} [#7657](https://github.com/ClickHouse/ClickHouse/issues/7657) @@ -1833,6 +1847,7 @@ Upd. Есть pull request. В стадии ревью. Готово. Есть pull request на промежуточной стадии. Отложено. +Отменено. ### 24.11. User Defined Functions {#user-defined-functions} @@ -1882,10 +1897,12 @@ Upd. Прототип bitonic sort помержен, но целесообраз Требует 2.1. Upd. Есть два прототипа от внешних контрибьюторов. +Александр Кузьменков. ### 24.15. Поддержка полуструктурированных данных {#podderzhka-polustrukturirovannykh-dannykh} Требует 1.14 и 2.10. +Антон Попов. ### 24.16. Улучшение эвристики слияний {#uluchshenie-evristiki-sliianii} @@ -1915,6 +1932,7 @@ Upd. Есть pull request - в большинстве случаев однов ### 24.21. Реализация в ClickHouse протокола распределённого консенсуса {#realizatsiia-v-clickhouse-protokola-raspredelionnogo-konsensusa} Имеет смысл только после 19.2. +Александр Сапин. ### 24.22. Вывод типов по блоку данных. Вывод формата данных по примеру {#vyvod-tipov-po-bloku-dannykh-vyvod-formata-dannykh-po-primeru} @@ -1955,13 +1973,14 @@ ClickHouse также может использоваться для быстр Михаил Филитов, ВШЭ. Upd. Есть pull request. Нужно ещё чистить код библиотеки. -### 24.26. Поддержка open tracing или аналогов {#podderzhka-open-tracing-ili-analogov} +### 24.26. + Поддержка open tracing или аналогов {#podderzhka-open-tracing-ili-analogov} [#5182](https://github.com/ClickHouse/ClickHouse/issues/5182) Александр Кожихов, ВШЭ и Яндекс.YT. Upd. Есть pull request с прототипом. Upd. Александ Кузьменков взял задачу в работу. +Сделано. ### 24.27. Реализация алгоритмов min-hash, sim-hash для нечёткого поиска полудубликатов {#realizatsiia-algoritmov-min-hash-sim-hash-dlia-nechiotkogo-poiska-poludublikatov} @@ -1995,7 +2014,7 @@ Amos Bird, но его решение слишком громоздкое и п Перепиcывание в JOIN. Не раньше 21.11, 21.12, 21.9. Низкий приоритет. Отложено. -### 24.32. Поддержка GRPC {#podderzhka-grpc} +### 24.32. + Поддержка GRPC {#podderzhka-grpc} Мария Конькова, ВШЭ и Яндекс. Также смотрите 24.29. @@ -2009,6 +2028,7 @@ Amos Bird, но его решение слишком громоздкое и п Задача в работе, есть pull request. [#10136](https://github.com/ClickHouse/ClickHouse/pull/10136) Upd. Задачу взял в работу Виталий Баранов. +Сделано. ## 25. DevRel {#devrel} @@ -2067,13 +2087,14 @@ Upd. Задачу взял в работу Виталий Баранов. Алексей Миловидов и все подготовленные докладчики. Upd. Участвуем. -### 25.14. Конференции в России: все HighLoad, возможно CodeFest, DUMP или UWDC, возможно C++ Russia {#konferentsii-v-rossii-vse-highload-vozmozhno-codefest-dump-ili-uwdc-vozmozhno-c-russia} +### 25.14. + Конференции в России: все HighLoad, возможно CodeFest, DUMP или UWDC, возможно C++ Russia {#konferentsii-v-rossii-vse-highload-vozmozhno-codefest-dump-ili-uwdc-vozmozhno-c-russia} Алексей Миловидов и все подготовленные докладчики. Upd. Есть Saint HighLoad online. Upd. Есть C++ Russia. CodeFest, DUMP, UWDC отменились. Upd. Добавились Highload Fwdays, Матемаркетинг. +Upd. Добавились подкасты C++ Russia. ### 25.15. Конференции зарубежные: Percona, DataOps, попытка попасть на более крупные {#konferentsii-zarubezhnye-percona-dataops-popytka-popast-na-bolee-krupnye} @@ -2096,6 +2117,7 @@ DataOps отменилась. Есть минимальный прототип. Сделал Илья Яцишин. Этот прототип не позволяет делиться ссылками на результаты запросов. Upd. На финальной стадии инструмент для экспериментирования с разными версиями ClickHouse. +Upd. По факту, задача считается не сделанной (готово только 99%, не 100%). ### 25.17. Взаимодействие с ВУЗами: ВШЭ, УрФУ, ICT Beijing {#vzaimodeistvie-s-vuzami-vshe-urfu-ict-beijing} @@ -2103,6 +2125,7 @@ Upd. На финальной стадии инструмент для экспе Благодаря Robert Hodges добавлен CMU. Upd. Взаимодействие с ВШЭ 2019/2020 успешно выполнено. Upd. Идёт подготовка к 2020/2021. +Upd. Уже взяли несколько десятков человек на 2020/2021. ### 25.18. - Лекция в ШАД {#lektsiia-v-shad} diff --git a/docs/tools/build.py b/docs/tools/build.py index bcbf3ac27cd..45d74423fa8 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -202,7 +202,11 @@ def build(args): if __name__ == '__main__': os.chdir(os.path.join(os.path.dirname(__file__), '..')) - website_dir = os.path.join('..', 'website') + + # A root path to ClickHouse source code. + src_dir = '..' + + website_dir = os.path.join(src_dir, 'website') arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa') @@ -210,6 +214,7 @@ if __name__ == '__main__': arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument('--website-dir', default=website_dir) + arg_parser.add_argument('--src-dir', default=src_dir) arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--enable-stable-releases', action='store_true') diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 3560b6ad40a..4106100bfa3 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.20 nltk==3.5 nose==1.3.7 -protobuf==3.13.0 +protobuf==3.14.0 numpy==1.19.2 Pygments==2.5.2 pymdown-extensions==8.0 diff --git a/docs/tools/website.py b/docs/tools/website.py index a658b0cfc34..4cce69bd869 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -145,13 +145,19 @@ def build_website(args): 'public', 'node_modules', 'templates', - 'locale' + 'locale', + '.gitkeep' ) ) + + # This file can be requested to check for available ClickHouse releases. + shutil.copy2( + os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), + os.path.join(args.output_dir, 'data', 'version_date.tsv')) + shutil.copy2( os.path.join(args.website_dir, 'js', 'embedd.min.js'), - os.path.join(args.output_dir, 'js', 'embedd.min.js') - ) + os.path.join(args.output_dir, 'js', 'embedd.min.js')) for root, _, filenames in os.walk(args.output_dir): for filename in filenames: diff --git a/docs/tr/development/contrib.md b/docs/tr/development/contrib.md index 63cc289ec9b..f56cf2a625b 100644 --- a/docs/tr/development/contrib.md +++ b/docs/tr/development/contrib.md @@ -19,7 +19,6 @@ toc_title: "Kullan\u0131lan \xDC\xE7\xFCnc\xFC Taraf K\xFCt\xFCphaneleri" | googletest | [BSD 3-Clause Lisansı](https://github.com/google/googletest/blob/master/LICENSE) | | h33 | [Apache Lic 2.0ense 2.0](https://github.com/uber/h3/blob/master/LICENSE) | | hyperscan | [BSD 3-Clause Lisansı](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD 2-Clause Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Zlib Lisansı](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2. 1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/tr/sql-reference/aggregate-functions/reference.md b/docs/tr/sql-reference/aggregate-functions/reference.md index 32706b325a9..b606d395cc7 100644 --- a/docs/tr/sql-reference/aggregate-functions/reference.md +++ b/docs/tr/sql-reference/aggregate-functions/reference.md @@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uıd, zaman damgası, değer) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` örnek zaman damgası değil hizalama farklı zaman serileri toplayabilir. -İki örnek zaman damgası arasında doğrusal enterpolasyon kullanacak ve daha sonra zaman serilerini birlikte toplayacaktır. - -- `uid` zaman serisi benzersiz kimliği mi, `UInt64`. -- `timestamp` milisaniye veya mikrosaniye desteklemek için Int64 türüdür. -- `value` metr .iktir. - -İşlev, tuples dizisini döndürür `(timestamp, aggregated_value)` çiftliler. - -Bu işlevi kullanmadan önce emin olun `timestamp` artan düzende. - -Örnek: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -Ve sonuç olacak: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## timeSeriesGroupRateSum(uıd, ts, val) {#agg-function-timeseriesgroupratesum} - -Benzer şekilde `timeSeriesGroupSum`, `timeSeriesGroupRateSum` zaman serilerinin oranını hesaplar ve daha sonra toplam oranları birlikte hesaplar. -Ayrıca, bu işlevi kullanmadan önce zaman damgası yükseliş sırasına göre olmalıdır. - -Bu fonksiyon dataun ver theiye uygulanması `timeSeriesGroupSum` örnek, aşağıdaki sonucu alırsınız: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## avg (x) {#agg_function-avg} Ortalama hesaplar. diff --git a/docs/tr/sql-reference/statements/create.md b/docs/tr/sql-reference/statements/create.md index 79bdb45f9e4..78390564880 100644 --- a/docs/tr/sql-reference/statements/create.md +++ b/docs/tr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/zh/commercial/support.md b/docs/zh/commercial/support.md index e8462fc962e..f543338d4e6 100644 --- a/docs/zh/commercial/support.md +++ b/docs/zh/commercial/support.md @@ -2,17 +2,17 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 toc_priority: 3 -toc_title: "\u788C\u83BD\u7984Support:" +toc_title: "\u5546\u4e1a\u652f\u6301" --- # ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers} !!! info "信息" - 如果您已经推出ClickHouse商业支持服务,请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。 + 如果您已经推出ClickHouse商业支持服务,请随时 [提交一个 pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。 -## 敏锐性 {#altinity} +## Altinity {#altinity} -隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄and陇.貌路.隆拢脳枚脢虏 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇 访问 [www.altinity.com](https://www.altinity.com/) 欲了解更多信息. + Altinity 自从 2017 年开始已经为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。 ## Mafiree {#mafiree} diff --git a/docs/zh/development/build-osx.md b/docs/zh/development/build-osx.md index 24923f75207..c7c386da97e 100644 --- a/docs/zh/development/build-osx.md +++ b/docs/zh/development/build-osx.md @@ -43,7 +43,7 @@ $ cd .. 为此,请创建以下文件: -/图书馆/LaunchDaemons/限制.maxfilesplist: +/资源库/LaunchDaemons/limit.maxfiles.plist: ``` xml diff --git a/docs/zh/development/contrib.md b/docs/zh/development/contrib.md index 0129ee62ce7..8e8efc3c04e 100644 --- a/docs/zh/development/contrib.md +++ b/docs/zh/development/contrib.md @@ -11,7 +11,6 @@ | FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) | | googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) | | 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | | libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | | libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | | libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | diff --git a/docs/zh/engines/table-engines/log-family/index.md b/docs/zh/engines/table-engines/log-family/index.md index 64378a73daa..c45a68b2aae 100644 --- a/docs/zh/engines/table-engines/log-family/index.md +++ b/docs/zh/engines/table-engines/log-family/index.md @@ -46,6 +46,6 @@ toc_priority: 29 `Log` 引擎为表中的每一列使用不同的文件。`StripeLog` 将所有的数据存储在一个文件中。因此 `StripeLog` 引擎在操作系统中使用更少的描述符,但是 `Log` 引擎提供更高的读性能。 -`TingLog` 引擎是该系列中最简单的引擎并且提供了最少的功能和最低的性能。`TingLog` 引擎不支持并行读取和并发数据访问,并将每一列存储在不同的文件中。它比其余两种支持并行读取的引擎的读取速度更慢,并且使用了和 `Log` 引擎同样多的描述符。你可以在简单的低负载的情景下使用它。 +`TinyLog` 引擎是该系列中最简单的引擎并且提供了最少的功能和最低的性能。`TinyLog` 引擎不支持并行读取和并发数据访问,并将每一列存储在不同的文件中。它比其余两种支持并行读取的引擎的读取速度更慢,并且使用了和 `Log` 引擎同样多的描述符。你可以在简单的低负载的情景下使用它。 [来源文章](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) diff --git a/docs/zh/engines/table-engines/log-family/log.md b/docs/zh/engines/table-engines/log-family/log.md index 4a3855c4bfa..5d3ee38d745 100644 --- a/docs/zh/engines/table-engines/log-family/log.md +++ b/docs/zh/engines/table-engines/log-family/log.md @@ -1,5 +1,5 @@ -# 日志 {#log} +# Log {#log} -日志与 TinyLog 的不同之处在于,«标记» 的小文件与列文件存在一起。这些标记写在每个数据块上,并且包含偏移量,这些偏移量指示从哪里开始读取文件以便跳过指定的行数。这使得可以在多个线程中读取表数据。对于并发数据访问,可以同时执行读取操作,而写入操作则阻塞读取和其它写入。Log 引擎不支持索引。同样,如果写入表失败,则该表将被破坏,并且从该表读取将返回错误。Log 引擎适用于临时数据,write-once 表以及测试或演示目的。 +`Log` 与 `TinyLog` 的不同之处在于,«标记» 的小文件与列文件存在一起。这些标记写在每个数据块上,并且包含偏移量,这些偏移量指示从哪里开始读取文件以便跳过指定的行数。这使得可以在多个线程中读取表数据。对于并发数据访问,可以同时执行读取操作,而写入操作则阻塞读取和其它写入。`Log`引擎不支持索引。同样,如果写入表失败,则该表将被破坏,并且从该表读取将返回错误。`Log`引擎适用于临时数据,write-once 表以及测试或演示目的。 [原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/log/) diff --git a/docs/zh/getting-started/example-datasets/amplab-benchmark.md b/docs/zh/getting-started/example-datasets/amplab-benchmark.md index 11a1c34b91e..22f2735968b 100644 --- a/docs/zh/getting-started/example-datasets/amplab-benchmark.md +++ b/docs/zh/getting-started/example-datasets/amplab-benchmark.md @@ -1,8 +1,13 @@ -# AMPLab大数据基准测试 {#amplab-da-shu-ju-ji-zhun-ce-shi} +--- +toc_priority: 19 +toc_title: AMPLab Big Data Benchmark +--- + +# AMPLab Big Data Benchmark {#amplab-big-data-benchmark} 参考 https://amplab.cs.berkeley.edu/benchmark/ -需要您在https://aws.amazon.com注册一个免费的账号。注册时需要您提供信用卡、邮箱、电话等信息。之后可以在https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential获取新的访问密钥 +需要您在[Amazon](https://aws.amazon.com)注册一个免费的账号。注册时需要您提供信用卡、邮箱、电话等信息。之后可以在[Amazon AWS Console](https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential)获取新的访问密钥 在控制台运行以下命令: diff --git a/docs/zh/getting-started/example-datasets/criteo.md b/docs/zh/getting-started/example-datasets/criteo.md index 8135634f9c4..40ca1b4a781 100644 --- a/docs/zh/getting-started/example-datasets/criteo.md +++ b/docs/zh/getting-started/example-datasets/criteo.md @@ -1,6 +1,11 @@ -# Criteo TB级别点击日志 {#criteo-tbji-bie-dian-ji-ri-zhi} +--- +toc_priority: 18 +toc_title: Terabyte Click Logs from Criteo +--- -可以从http://labs.criteo.com/downloads/download-terabyte-click-logs/上下载数据 +# Terabyte of Click Logs from Criteo {#criteo-tbji-bie-dian-ji-ri-zhi} + +可以从 http://labs.criteo.com/downloads/download-terabyte-click-logs/ 上下载数据 创建原始数据对应的表结构: diff --git a/docs/zh/getting-started/example-datasets/index.md b/docs/zh/getting-started/example-datasets/index.md index 15f4d483312..acd554ca4da 100644 --- a/docs/zh/getting-started/example-datasets/index.md +++ b/docs/zh/getting-started/example-datasets/index.md @@ -6,15 +6,16 @@ toc_title: "\u5BFC\u8A00" # 示例数据集 {#example-datasets} -本节介绍如何获取示例数据集并将其导入ClickHouse。 +本节介绍如何获取示例数据集并将其导入ClickHouse。对于某些数据集,还可以使用示例查询。 + 对于某些数据集示例查询也可用。 -- [脱敏的Yandex.Metrica数据集](metrica.md) -- [星型基准测试](star-schema.md) -- [维基访问数据](wikistat.md) -- [Criteo TB级别点击日志](criteo.md) -- [AMPLab大数据基准测试](amplab-benchmark.md) -- [纽约出租车数据](nyc-taxi.md) -- [航班飞行数据](ontime.md) +- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) +- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) +- [WikiStat](../../getting-started/example-datasets/wikistat.md) +- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) +- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) +- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) [原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) diff --git a/docs/zh/getting-started/example-datasets/metrica.md b/docs/zh/getting-started/example-datasets/metrica.md index 22dccc26dcb..353a24ce0cb 100644 --- a/docs/zh/getting-started/example-datasets/metrica.md +++ b/docs/zh/getting-started/example-datasets/metrica.md @@ -1,17 +1,17 @@ --- -toc_priority: 21 -toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E" +toc_priority: 15 +toc_title: Yandex.Metrica Data --- -# 脱敏的Yandex.Metrica数据集 {#anonymized-yandex-metrica-data} +# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data} -Dataset由两个表组成,其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`)的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科。 +数据集由两个表组成,包含关于Yandex.Metrica的hits(`hits_v1`)和visit(`visits_v1`)的匿名数据。你可以阅读更多关于Yandex的信息。在[ClickHouse历史](../../introduction/history.md)的Metrica部分。 -数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz. +数据集由两个表组成,他们中的任何一个都可以下载作为一个压缩`tsv.xz`的文件或准备的分区。除此之外,一个扩展版的`hits`表包含1亿行TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz,准备分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz。 ## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions} -下载和导入点击表: +下载和导入`hits`表: ``` bash curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar @@ -21,7 +21,7 @@ sudo service clickhouse-server restart clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -下载和导入访问: +下载和导入`visits`表: ``` bash curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar @@ -31,9 +31,9 @@ sudo service clickhouse-server restart clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ``` -## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file} +## 从TSV压缩文件获取表 {#obtaining-tables-from-compressed-tsv-file} -从压缩的TSV文件下载并导入命中: +从TSV压缩文件下载并导入`hits`: ``` bash curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv @@ -47,7 +47,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL" clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -从压缩tsv文件下载和导入访问: +从压缩tsv文件下载和导入`visits`: ``` bash curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv @@ -63,6 +63,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ## 查询示例 {#example-queries} -[点击教程](../../getting-started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。 +[使用教程](../../getting-started/tutorial.md)是以Yandex.Metrica数据集开始教程。 -查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的(它们被命名为 `test.hists` 和 `test.visits` 那里)。 +可以在ClickHouse的[stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) 中找到对这些表的查询的其他示例(它们被命名为`test.hists`和`test.visits`)。 diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md index fa146d1ca38..c6b41e9d396 100644 --- a/docs/zh/getting-started/example-datasets/nyc-taxi.md +++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md @@ -1,15 +1,20 @@ -# 纽约市出租车数据 {#niu-yue-shi-chu-zu-che-shu-ju} +--- +toc_priority: 20 +toc_title: New York Taxi Data +--- + +# 纽约出租车数据 {#niu-yue-shi-chu-zu-che-shu-ju} 纽约市出租车数据有以下两个方式获取: -从原始数据导入 -下载预处理好的分区数据 +- 从原始数据导入 +- 下载处理好的数据 ## 怎样导入原始数据 {#zen-yang-dao-ru-yuan-shi-shu-ju} -可以参考https://github.com/toddwschneider/nyc-taxi-data和http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html中的关于数据集结构描述与数据下载指令说明。 +可以参考 https://github.com/toddwschneider/nyc-taxi-data 和 http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html 中的关于数据集结构描述与数据下载指令说明。 -数据集包含227GB的CSV文件。这大约需要一个小时的下载时间(1Gbit带宽下,并行下载大概是一半时间)。 +数据集包含227GB的CSV文件。在1Gbig的带宽下,下载大约需要一个小时这大约需要一个小时的下载时间(从s3.amazonaws.com并行下载时间至少可以缩减一半)。 下载时注意损坏的文件。可以检查文件大小并重新下载损坏的文件。 有些文件中包含一些无效的行,您可以使用如下语句修复他们: @@ -21,7 +26,7 @@ mv data/yellow_tripdata_2010-02.csv_ data/yellow_tripdata_2010-02.csv mv data/yellow_tripdata_2010-03.csv_ data/yellow_tripdata_2010-03.csv ``` -然后您必须在PostgreSQL中预处理这些数据。这将创建多边形中的点(以匹配在地图中纽约市中范围),然后通过使用JOIN查询将数据关联组合到一个规范的表中。为了完成这部分操作,您需要安装PostgreSQL的同时安装PostGIS插件。 +然后必须在PostgreSQL中对数据进行预处理。这将创建多边形中选择的点(将地图上的点与纽约市的行政区相匹配),并使用连接将所有数据合并到一个非规范化的平面表中。为此,您需要安装支持PostGIS的PostgreSQL。 运行`initialize_database.sh`时要小心,并手动重新检查是否正确创建了所有表。 @@ -114,7 +119,7 @@ COPY ) TO '/opt/milovidov/nyc-taxi-data/trips.tsv'; ``` -数据快照的创建速度约为每秒50 MB。 在创建快照时,PostgreSQL以每秒约28 MB的速度从磁盘读取数据。 +数据快照的创建速度约为每秒50MB。 在创建快照时,PostgreSQL以每秒约28MB的速度从磁盘读取数据。 这大约需要5个小时。 最终生成的TSV文件为590612904969 bytes。 在ClickHouse中创建临时表: @@ -186,11 +191,11 @@ real 75m56.214s 数据的读取速度为112-140 Mb/秒。 通过这种方式将数据加载到Log表中需要76分钟。 -这个表中的数据需要使用142 GB的磁盘空间. +这个表中的数据需要使用142GB的磁盘空间. (也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据) -由于数据中与天气相关的所有数据(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 +数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以,我们将从最终数据集中删除它们 首先,我们使用单台服务器创建表,后面我们将在多台节点上创建这些表。 @@ -259,7 +264,7 @@ FROM trips ``` 这需要3030秒,速度约为每秒428,000行。 -要加快速度,可以使用`Log`引擎替换’MergeTree\`引擎来创建表。 在这种情况下,下载速度超过200秒。 +要加快速度,可以使用`Log`引擎替换`MergeTree`引擎来创建表。 在这种情况下,下载速度超过200秒。 这个表需要使用126GB的磁盘空间。 @@ -286,8 +291,7 @@ $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` !!! info "信息" - 如果要运行下面的SQL查询,必须使用完整的表名, -`datasets.trips_mergetree`。 + 如果要运行下面的SQL查询,必须使用完整的表名,`datasets.trips_mergetree`。 ## 单台服务器运行结果 {#dan-tai-fu-wu-qi-yun-xing-jie-guo} @@ -328,9 +332,9 @@ ORDER BY year, count(*) DESC 我们使用的是如下配置的服务器: -两个英特尔(R)至强(R)CPU E5-2650v2@2.60GHz,总共有16个物理内核,128GiB RAM,硬件RAID-5上的8X6TB HD +两个`Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz`,总共有16个物理内核,128GiB RAM,8X6TB HD,RAID-5 -执行时间是取三次运行中最好的值,但是从第二次查询开始,查询就讲从文件系统的缓存中读取数据。同时在每次读取和处理后不在进行缓存。 +执行时间是取三次运行中最好的值,但是从第二次查询开始,查询就将从文件系统的缓存中读取数据。同时在每次读取和处理后不在进行缓存。 在三台服务器中创建表结构: @@ -356,12 +360,12 @@ INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree 在三台服务器集群中运行的结果: -Q1:0.212秒. +Q1: 0.212秒. Q2:0.438秒。 Q3:0.733秒。 -Q4:1.241秒. +Q4: 1.241秒. -不出意料,查询是线性扩展的。 +这并不奇怪,因为查询是线性扩展的。 我们同时在140台服务器的集群中运行的结果: @@ -371,7 +375,7 @@ Q3:0.051秒。 Q4:0.072秒。 在这种情况下,查询处理时间首先由网络延迟确定。 -我们使用位于芬兰的Yandex数据中心中的客户端去位于俄罗斯的集群上运行查询,这增加了大约20毫秒的延迟。 +我们使用位于芬兰Yandex数据中心的客户机在俄罗斯的一个集群上运行查询,这增加了大约20毫秒的延迟。 ## 总结 {#zong-jie} diff --git a/docs/zh/getting-started/example-datasets/ontime.md b/docs/zh/getting-started/example-datasets/ontime.md index 51749d9013b..4c21eee51a2 100644 --- a/docs/zh/getting-started/example-datasets/ontime.md +++ b/docs/zh/getting-started/example-datasets/ontime.md @@ -1,9 +1,14 @@ -# 航班飞行数据 {#hang-ban-fei-xing-shu-ju} +--- +toc_priority: 21 +toc_title: OnTime +--- + +# OnTime {#ontime} 航班飞行数据有以下两个方式获取: - 从原始数据导入 -- 下载预处理好的分区数据 +- 下载预处理好的数据 ## 从原始数据导入 {#cong-yuan-shi-shu-ju-dao-ru} @@ -19,7 +24,7 @@ done done ``` -(引用 https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh ) +(参考 https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh ) 创建表结构: @@ -157,8 +162,7 @@ $ clickhouse-client --query "select count(*) from datasets.ontime" ``` !!! info "信息" - 如果要运行下面的SQL查询,必须使用完整的表名, -`datasets.ontime`。 + 如果要运行下面的SQL查询,必须使用完整的表名,`datasets.ontime`。 ## 查询: {#cha-xun} @@ -356,7 +360,7 @@ ORDER by rate DESC LIMIT 1000; ``` -奖金: +Bonus: ``` sql SELECT avg(cnt) @@ -402,3 +406,5 @@ LIMIT 10; - https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/ - https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/ - http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html + +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/ontime/) diff --git a/docs/zh/getting-started/example-datasets/star-schema.md b/docs/zh/getting-started/example-datasets/star-schema.md index 71dd58160a6..fcb6e90c694 100644 --- a/docs/zh/getting-started/example-datasets/star-schema.md +++ b/docs/zh/getting-started/example-datasets/star-schema.md @@ -1,4 +1,9 @@ -# 星型基准测试 {#star-schema-benchmark} +--- +toc_priority: 16 +toc_title: Star Schema Benchmark +--- + +# Star Schema Benchmark {#star-schema-benchmark} 编译 dbgen: @@ -10,6 +15,9 @@ $ make 开始生成数据: +!!! warning "注意" + 使用`-s 100`dbgen将生成6亿行数据(67GB), 如果使用`-s 1000`它会生成60亿行数据(这需要很多时间)) + ``` bash $ ./dbgen -s 1000 -T c $ ./dbgen -s 1000 -T l @@ -18,7 +26,7 @@ $ ./dbgen -s 1000 -T s $ ./dbgen -s 1000 -T d ``` -在ClickHouse中创建表结构: +在ClickHouse中创建数据表: ``` sql CREATE TABLE customer @@ -92,7 +100,7 @@ $ clickhouse-client --query "INSERT INTO supplier FORMAT CSV" < supplier.tbl $ clickhouse-client --query "INSERT INTO lineorder FORMAT CSV" < lineorder.tbl ``` -将«星型模型»转换为非规范化的«平面模型»: +将`star schema`转换为`flat schema`: ``` sql SET max_memory_usage = 20000000000, allow_experimental_multiple_joins_emulation = 1; diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md index 2986f90ef9f..4320d6b4926 100644 --- a/docs/zh/getting-started/example-datasets/wikistat.md +++ b/docs/zh/getting-started/example-datasets/wikistat.md @@ -1,4 +1,9 @@ -# 维基访问数据 {#wei-ji-fang-wen-shu-ju} +--- +toc_priority: 17 +toc_title: WikiStat +--- + +# WikiStat {#wikistat} 参考: http://dumps.wikimedia.org/other/pagecounts-raw/ diff --git a/docs/zh/getting-started/index.md b/docs/zh/getting-started/index.md index ac70394c785..ac6074eb72f 100644 --- a/docs/zh/getting-started/index.md +++ b/docs/zh/getting-started/index.md @@ -1,3 +1,10 @@ +--- +machine_translated: true +machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd +toc_folder_title: "\u5BFC\u8A00" +toc_priority: 2 +--- + # 入门 {#ru-men} 如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md). diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index 84791197ab6..51d6ed198fa 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -1,34 +1,46 @@ +--- +toc_priority: 11 +toc_title: 安装部署 +--- + # 安装 {#clickhouse-an-zhuang} ## 系统要求 {#xi-tong-yao-qiu} ClickHouse可以在任何具有x86_64,AArch64或PowerPC64LE CPU架构的Linux,FreeBSD或Mac OS X上运行。 -虽然预构建的二进制文件通常是为x86  _64编译并利用SSE 4.2指令集,但除非另有说明,否则使用支持它的CPU将成为额外的系统要求。这是检查当前CPU是否支持SSE 4.2的命令: +官方预构建的二进制文件通常针对x86_64进行编译,并利用`SSE 4.2`指令集,因此,除非另有说明,支持它的CPU使用将成为额外的系统需求。下面是检查当前CPU是否支持SSE 4.2的命令: ``` bash $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" ``` -要在不支持SSE 4.2或具有AArch64或PowerPC64LE体系结构的处理器上运行ClickHouse,您应该[通过源构建ClickHouse](#from-sources)进行适当的配置调整。 +要在不支持`SSE 4.2`或`AArch64`,`PowerPC64LE`架构的处理器上运行ClickHouse,您应该通过适当的配置调整从[源代码构建ClickHouse](#from-sources)。 -## 可用的安装选项 {#install-from-deb-packages} +## 可用安装包 {#install-from-deb-packages} -建议为Debian或Ubuntu使用官方的预编译`deb`软件包。 运行以下命令以安装软件包: +### `DEB`安装包 -然后运行: +建议使用Debian或Ubuntu的官方预编译`deb`软件包。运行以下命令来安装包: ``` bash {% include 'install/deb.sh' %} ``` -你也可以从这里手动下载安装包:https://repo.clickhouse.tech/deb/stable/main/。 +如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 -如果你想使用最新的测试版本,请使用`testing`替换`stable`。 +你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/deb/stable/main/)。 -### 来自RPM包 {#from-rpm-packages} +安装包列表: -Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于CentOS,RedHat和所有其他基于rpm的Linux发行版。 +- `clickhouse-common-static` — ClickHouse编译的二进制文件。 +- `clickhouse-server` — 创建`clickhouse-server`软连接,并安装默认配置服务 +- `clickhouse-client` — 创建`clickhouse-client`客户端工具软连接,并安装客户端配置文件。 +- `clickhouse-common-static-dbg` — 带有调试信息的ClickHouse二进制文件。 + +### `RPM`安装包 {#from-rpm-packages} + +推荐使用CentOS、RedHat和所有其他基于rpm的Linux发行版的官方预编译`rpm`包。 首先,您需要添加官方存储库: @@ -38,84 +50,120 @@ sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64 ``` -如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。 +如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 -然后运行这些命令以实际安装包: +然后运行命令安装: ``` bash sudo yum install clickhouse-server clickhouse-client ``` -您也可以从此处手动下载和安装软件包:https://repo.clickhouse.tech/rpm/stable/x86_64。 +你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.tech/rpm/stable/x86_64)。 -### 来自Docker {#from-docker-image} +### `Tgz`安装包 {#from-tgz-archives} -要在Docker中运行ClickHouse,请遵循[码头工人中心](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。 +如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。 + +所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.tech/tgz/`下载。 + +下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新版本的安装示例: + +``` bash +export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` +curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz + +tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz +sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz +sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-server-$LATEST_VERSION.tgz +sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +sudo /etc/init.d/clickhouse-server start + +tar -xzvf clickhouse-client-$LATEST_VERSION.tgz +sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +``` + +对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 + +### `Docker`安装包 {#from-docker-image} + +要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。它是官方的`deb`安装包。 + +### 其他环境安装包 {#from-other} + +对于非linux操作系统和Arch64 CPU架构,ClickHouse将会以`master`分支的最新提交的进行编译提供(它将会有几小时的延迟)。 + +- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` +- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` + +下载后,您可以使用`clickhouse client`连接服务,或者使用`clickhouse local`模式处理数据,不过您必须要额外在GitHub下载[server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml)和[users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml)配置文件。 + +不建议在生产环境中使用这些构建版本,因为它们没有经过充分的测试,但是您可以自行承担这样做的风险。它们只是ClickHouse功能的一个部分。 ### 使用源码安装 {#from-sources} -具体编译方式可以参考build.md。 +要手动编译ClickHouse, 请遵循[Linux](../development/build.md)或[Mac OS X](../development/build-osx.md)说明。 -你可以编译并安装它们。 -你也可以直接使用而不进行安装。 +您可以编译并安装它们,也可以使用不安装包的程序。通过手动构建,您可以禁用`SSE 4.2`或`AArch64 cpu`。 -``` text -Client: programs/clickhouse-client -Server: programs/clickhouse-server -``` + Client: programs/clickhouse-client + Server: programs/clickhouse-server -在服务器中为数据创建如下目录: +您需要创建一个数据和元数据文件夹,并为所需的用户`chown`授权。它们的路径可以在服务器配置(`src/programs/server/config.xml`)中改变,默认情况下它们是: -``` text -/opt/clickhouse/data/default/ -/opt/clickhouse/metadata/default/ -``` + /opt/clickhouse/data/default/ + /opt/clickhouse/metadata/default/ -(它们可以在server config中配置。) -为需要的用户运行’chown’ - -日志的路径可以在server config (src/programs/server/config.xml)中配置。 +在Gentoo上,你可以使用`emerge clickhouse`从源代码安装ClickHouse。 ## 启动 {#qi-dong} -可以运行如下命令在后台启动服务: +如果没有`service`,可以运行如下命令在后台启动服务: ``` bash -sudo service clickhouse-server start +$ sudo /etc/init.d/clickhouse-server start ``` -可以在`/var/log/clickhouse-server/`目录中查看日志。 +日志文件将输出在`/var/log/clickhouse-server/`文件夹。 -如果服务没有启动,请检查配置文件 `/etc/clickhouse-server/config.xml`。 +如果服务器没有启动,检查`/etc/clickhouse-server/config.xml`中的配置。 -你也可以在控制台中直接启动服务: +您也可以手动从控制台启动服务器: -``` bash -clickhouse-server --config-file=/etc/clickhouse-server/config.xml +```bash +$ clickhouse-server --config-file=/etc/clickhouse-server/config.xml ``` -在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。 -如果配置文件在当前目录中,你可以不指定’–config-file’参数。它默认使用’./config.xml’。 +在这种情况下,日志将被打印到控制台,这在开发过程中很方便。 -你可以使用命令行客户端连接到服务: +如果配置文件在当前目录中,则不需要指定`——config-file`参数。默认情况下,它的路径为`./config.xml`。 + +ClickHouse支持访问限制设置。它们位于`users.xml`文件(与`config.xml`同级目录)。 +默认情况下,允许`default`用户从任何地方访问,不需要密码。可查看`user/default/networks`。 +更多信息,请参见[Configuration Files](../operations/configuration-files.md)。 + +启动服务后,您可以使用命令行客户端连接到它: ``` bash -clickhouse-client +$ clickhouse-client ``` -默认情况下它使用’default’用户无密码的与localhost:9000服务建立连接。 -客户端也可以用于连接远程服务,例如: +默认情况下,使用`default`用户并不携带密码连接到`localhost:9000`。还可以使用`--host`参数连接到指定服务器。 + +终端必须使用UTF-8编码。 +更多信息,请参阅[Command-line client](../interfaces/cli.md)。 + +示例: -``` bash -clickhouse-client --host=example.com ``` - -有关更多信息,请参考«Command-line client»部分。 - -检查系统是否工作: - -``` bash -milovidov@hostname:~/work/metrica/src/src/Client$ ./clickhouse-client +$ ./clickhouse-client ClickHouse client version 0.0.18749. Connecting to localhost:9000. Connected to ClickHouse server version 0.0.18749. @@ -135,6 +183,6 @@ SELECT 1 **恭喜,系统已经工作了!** -为了继续进行实验,你可以尝试下载测试数据集。 +为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.tech/tutorial.html)。 [原始文章](https://clickhouse.tech/docs/en/getting_started/install/) diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 192203c6fe6..3eac3905f23 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A" ClickHouse体验还有如下: [ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) -实例托管 [Yandex云](https://cloud.yandex.com/). -更多信息 [云提供商](../commercial/cloud.md). +实例托管 [Yandex云](https://cloud.yandex.com/)。 +更多信息 [云提供商](../commercial/cloud.md)。 ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的. 体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。 体验平台也同样提供了ClickHouse HTTPS服务端口。 -您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机 -有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md). +您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。 +有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。 | 参数 | 值 | |:---------|:--------------------------------------| diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 07f595b4354..93f368bc2dc 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -1,19 +1,19 @@ --- toc_priority: 12 -toc_title: "\u6559\u7A0B" +toc_title: 使用教程 --- -# 点击教程 {#clickhouse-tutorial} +# ClickHouse教程 {#clickhouse-tutorial} -## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial} +## 从本教程中可以获得什么? {#what-to-expect-from-this-tutorial} -通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但却是容错和可扩展的。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。 +通过学习本教程,您将了解如何设置一个简单的ClickHouse集群。它会很小,但是可以容错和扩展。然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。 ## 单节点设置 {#single-node-setup} -为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包安装,但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image) 。 +为了延迟演示分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。ClickHouse通常是从[deb](install.md#install-from-deb-packages)或[rpm](install.md#from-rpm-packages)包安装,但对于不支持它们的操作系统也有[其他方法](install.md#from-docker-image)。 -例如,您选择了从 `deb` 包安装,执行: +例如,您选择`deb`安装包,执行: ``` bash {% include 'install/deb.sh' %} @@ -21,13 +21,13 @@ toc_title: "\u6559\u7A0B" 在我们安装的软件中包含这些包: -- `clickhouse-client` 包,包含 [clickhouse-client](../interfaces/cli.md) 应用程序,它是交互式ClickHouse控制台客户端。 +- `clickhouse-client` 包,包含[clickhouse-client](../interfaces/cli.md)客户端,它是交互式ClickHouse控制台客户端。 - `clickhouse-common` 包,包含一个ClickHouse可执行文件。 - `clickhouse-server` 包,包含要作为服务端运行的ClickHouse配置文件。 -服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前,请注意 `config.xml`文件中的`` 元素. Path决定了数据存储的位置,因此该位置应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`。 如果你想调整配置,考虑到它可能会在未来的软件包更新中被重写,直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件,作为config.xml文件的“补丁”,用以复写配置元素。 +服务器配置文件位于`/etc/clickhouse-server/`。在继续之前,请注意`config.xml`中的``元素。它决定了数据存储的位置,因此它应该位于磁盘容量的卷上;默认值是`/var/lib/clickhouse/`。如果你想调整配置,直接编辑config是不方便的。考虑到它可能会在将来的包更新中被重写。建议重写配置元素的方法是在配置中创建[config.d文件夹](../operations/configuration-files.md),作为config.xml的重写方式。 -你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样: +你可能已经注意到了,`clickhouse-server`安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样: ``` bash sudo service clickhouse-server start @@ -39,9 +39,9 @@ sudo service clickhouse-server start sudo /etc/init.d/clickhouse-server start ``` -服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息,即表示服务端已准备好处理客户端连接。 +服务端日志的默认位置是`/var/log/clickhouse-server/`。当服务端在日志中记录`Ready for connections`消息,即表示服务端已准备好处理客户端连接。 -一旦 `clickhouse-server` 启动并运行,我们可以利用 `clickhouse-client` 连接到服务端,并运行一些测试查询,如 `SELECT "Hello, world!";`. +一旦`clickhouse-server`启动并运行,我们可以利用`clickhouse-client`连接到服务端,并运行一些测试查询,如`SELECT "Hello, world!";`.
@@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## 导入示例数据集 {#import-sample-dataset} -现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。有 [多种导入Yandex.Metrica数据集的的方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。 +现在是时候用一些示例数据填充我们的ClickHouse服务端。 在本教程中,我们将使用Yandex.Metrica的匿名数据,它是在ClickHouse成为开源之前作为生产环境运行的第一个服务(关于这一点的更多内容请参阅[ClickHouse历史](../introduction/history.md))。[多种导入Yandex.Metrica数据集方法](example-datasets/metrica.md),为了本教程,我们将使用最现实的一个。 ### 下载并提取表数据 {#download-and-extract-table-data} @@ -93,17 +93,17 @@ curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unx ### 创建表 {#create-tables} -与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个 `default` 数据库,但我们将创建一个新的数据库 `tutorial`: +与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为数据库。包含一个`default`数据库,但我们将创建一个新的数据库`tutorial`: ``` bash clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" ``` -与创建数据库相比,创建表的语法要复杂得多(请参阅 [参考资料](../sql-reference/statements/create.md). 一般 `CREATE TABLE` 声明必须指定三个关键的事情: +与创建数据库相比,创建表的语法要复杂得多(请参阅[参考资料](../sql-reference/statements/create.md). 一般`CREATE TABLE`声明必须指定三个关键的事情: 1. 要创建的表的名称。 2. 表结构,例如:列名和对应的[数据类型](../sql-reference/data-types/index.md)。 -3. [表引擎](../engines/table-engines/index.md) 及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。 +3. [表引擎](../engines/table-engines/index.md)及其设置,这决定了对此表的查询操作是如何在物理层面执行的所有细节。 Yandex.Metrica是一个网络分析服务,样本数据集不包括其全部功能,因此只有两个表可以创建: @@ -455,11 +455,11 @@ SETTINGS index_granularity = 8192 您可以使用`clickhouse-client`的交互模式执行这些查询(只需在终端中启动它,而不需要提前指定查询)。或者如果你愿意,可以尝试一些[替代接口](../interfaces/index.md)。 -正如我们所看到的, `hits_v1` 使用 [基本的MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而 `visits_v1` 使用 [折叠树](../engines/table-engines/mergetree-family/collapsingmergetree.md) 变体。 +正如我们所看到的, `hits_v1`使用 [MergeTree引擎](../engines/table-engines/mergetree-family/mergetree.md),而`visits_v1`使用 [Collapsing](../engines/table-engines/mergetree-family/collapsingmergetree.md)引擎。 ### 导入数据 {#import-data} -数据导入到ClickHouse是通过以下方式完成的 [INSERT INTO](../sql-reference/statements/insert-into.md) 查询像许多其他SQL数据库。 然而,数据通常是在一个提供 [支持的序列化格式](../interfaces/formats.md) 而不是 `VALUES` 子句(也支持)。 +数据导入到ClickHouse是通过[INSERT INTO](../sql-reference/statements/insert-into.md)方式完成的,查询类似许多SQL数据库。然而,数据通常是在一个提供[支持序列化格式](../interfaces/formats.md)而不是`VALUES`子句(也支持)。 我们之前下载的文件是以制表符分隔的格式,所以这里是如何通过控制台客户端导入它们: @@ -468,7 +468,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv ``` -ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的 `--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询 `system.settings` 表: +ClickHouse有很多[要调整的设置](../operations/settings/index.md)在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的`--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询`system.settings` 表: ``` sql SELECT name, value, changed, description @@ -479,14 +479,14 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行: +您也可以[OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize)导入后的表。使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" ``` -这些查询开始一个I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。 +这些查询开始I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。 现在我们可以检查表导入是否成功: @@ -524,9 +524,9 @@ ClickHouse集群是一个同质集群。 设置步骤: 1. 在群集的所有机器上安装ClickHouse服务端 2. 在配置文件中设置群集配置 3. 在每个实例上创建本地表 -4. 创建一个 [分布式表](../engines/table-engines/special/distributed.md) +4. 创建一个[分布式表](../engines/table-engines/special/distributed.md) -[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “视图”,映射到ClickHouse集群的本地表。 从分布式表中执行 **SELECT** 查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。 +[分布式表](../engines/table-engines/special/distributed.md)实际上是一种`view`,映射到ClickHouse集群的本地表。 从分布式表中执行**SELECT**查询会使用集群所有分片的资源。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。 具有三个分片,每个分片一个副本的集群的示例配置: @@ -555,7 +555,7 @@ ClickHouse集群是一个同质集群。 设置步骤: ``` -为了进一步演示,让我们使用和创建 `hits_v1` 表相同的 `CREATE TABLE` 语句创建一个新的本地表,但表名不同: +为了进一步演示,让我们使用和创建`hits_v1`表相同的`CREATE TABLE`语句创建一个新的本地表,但表名不同: ``` sql CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... @@ -568,9 +568,9 @@ CREATE TABLE tutorial.hits_all AS tutorial.hits_local ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); ``` -常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql-reference/table-functions/remote.md) 表功能。 +常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表[远程](../sql-reference/table-functions/remote.md)表功能。 -让我们运行 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。 +让我们运行[INSERT SELECT](../sql-reference/statements/insert-into.md)将该表传播到多个服务器。 ``` sql INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; @@ -609,10 +609,10 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -启用本机复制 [Zookeeper](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。 +启用本机复制[Zookeeper](http://zookeeper.apache.org/)是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括运行的ClickHouse)。 -!!! note "注" - ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。 +!!! note "注意" + ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是**不**建议的,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点。 ZooKeeper位置在配置文件中指定: @@ -653,12 +653,12 @@ ENGINE = ReplcatedMergeTree( ... ``` -在这里,我们使用 [ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md) 表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。 +在这里,我们使用[ReplicatedMergeTree](../engines/table-engines/mergetree-family/replication.md)表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。 ``` sql INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -复制在多主机模式下运行。 数据可以加载到任何副本中,然后系统会自动将其与其他实例同步。 复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。 至少应有一个副本允许数据摄取。 其他人将同步数据和修复一致性,一旦他们将再次变得活跃。 请注意,这种方法允许最近插入的数据丢失的可能性很低。 +复制在多主机模式下运行。数据可以加载到任何副本中,然后系统自动将其与其他实例同步。复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。至少应该有一个副本允许数据摄入。另一些则会在重新激活后同步数据并修复一致性。请注意,这种方法允许最近插入的数据丢失的可能性很低。 [原始文章](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/zh/index.md b/docs/zh/index.md index 5294dc6c8c7..2bef22f3de4 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -4,53 +4,50 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在传统的行式数据库系统中,数据按如下顺序存储: -| row | watchID | JavaEnable | title | GoodEvent | EventTime | -|-----|-------------|------------|------------|-----------|---------------------| -| #0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | 任务 | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | +| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | +|-----|-------------|------------|--------------------|-----------|---------------------| +| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | +| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | +| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | +| #N | … | … | … | … | … | 处于同一行中的数据总是被物理的存储在一起。 -常见的行式数据库系统有: MySQL、Postgres和MS SQL Server。 -{: .灰色 } +常见的行式数据库系统有:`MySQL`、`Postgres`和`MS SQL Server`。 在列式数据库系统中,数据按如下的顺序存储: -| row: | #0 | #1 | #2 | #N | +| Row: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| -| watchID: | 89354350662 | 90329509958 | 89953706054 | … | +| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | | JavaEnable: | 1 | 0 | 1 | … | -| title: | 投资者关系 | 联系我们 | 任务 | … | +| Title: | Investor Relations | Contact us | Mission | … | | GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | -该示例中只展示了数据在列式数据库中数据的排列方式。 -对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。 +这些示例只显示了数据的排列顺序。来自不同列的值被单独存储,来自同一列的数据被存储在一起。 常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。 -{: .灰色 } -不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。 +不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例;每种类型的查询(行、列和字节)读取多少数据;读取数据和更新之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。 -系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率? +系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率? ## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng} -- 大多数是读请求 -- 数据总是以相当大的批(\> 1000 rows)进行写入 -- 不修改已添加的数据 -- 每次查询都从数据库中读取大量的行,但是同时又仅需要少量的列 +- 绝大多数是读请求 +- 数据以相当大的批次(\> 1000行)更新,而不是单行更新;或者根本没有更新。 +- 已添加到数据库的数据不能修改。 +- 对于读取,从数据库中提取相当多的行,但只提取列的一小部分。 - 宽表,即每个表包含着大量的列 -- 较少的查询(通常每台服务器每秒数百个查询或更少) +- 查询相对较少(通常每台服务器每秒查询数百次或更少) - 对于简单查询,允许延迟大约50毫秒 -- 列中的数据相对较小: 数字和短字符串(例如,每个URL 60个字节) -- 处理单个查询时需要高吞吐量(每个服务器每秒高达数十亿行) +- 列中的数据相对较小:数字和短字符串(例如,每个URL 60个字节) +- 处理单个查询时需要高吞吐量(每台服务器每秒可达数十亿行) - 事务不是必须的 - 对数据一致性要求低 -- 每一个查询除了一个大表外都很小 -- 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存中 +- 每个查询有一个大表。除了他意以外,其他的都很小。 +- 查询结果明显小于源数据。换句话说,数据经过过滤或聚合,因此结果适合于单个服务器的RAM中 很容易可以看出,OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景,并不是非常完美的适用方案。例如,使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。 diff --git a/docs/zh/introduction/adopters.md b/docs/zh/introduction/adopters.md index 38b9ca690e3..fc7dfa4efeb 100644 --- a/docs/zh/introduction/adopters.md +++ b/docs/zh/introduction/adopters.md @@ -1,6 +1,6 @@ --- -toc_priority: 8 -toc_title: "\u91C7\u7528\u8005" +toc_priority: 5 +toc_title: "ClickHouse用户" --- # ClickHouse用户 {#clickhouse-adopters} diff --git a/docs/zh/introduction/distinctive-features.md b/docs/zh/introduction/distinctive-features.md index 7396008f3b9..e9a506f2481 100644 --- a/docs/zh/introduction/distinctive-features.md +++ b/docs/zh/introduction/distinctive-features.md @@ -1,3 +1,8 @@ +--- +toc_priority: 2 +toc_title: ClickHouse的特性 +--- + # ClickHouse的特性 {#clickhouse-de-te-xing} ## 真正的列式数据库管理系统 {#zhen-zheng-de-lie-shi-shu-ju-ku-guan-li-xi-tong} @@ -12,9 +17,13 @@ 在一些列式数据库管理系统中(例如:InfiniDB CE 和 MonetDB) 并没有使用数据压缩。但是, 若想达到比较优异的性能,数据压缩确实起到了至关重要的作用。 +除了在磁盘空间和CPU消耗之间进行不同权衡的高效通用压缩编解码器之外,ClickHouse还提供针对特定类型数据的[专用编解码器](../sql-reference/statements/create/table.md#create-query-specialized-codecs),这使得ClickHouse能够与更小的数据库(如时间序列数据库)竞争并超越它们。 + ## 数据的磁盘存储 {#shu-ju-de-ci-pan-cun-chu} -许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果有可以使用SSD和内存,它也会合理的利用这些资源。 +许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。 + +ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果可以使用SSD和内存,它也会合理的利用这些资源。 ## 多核心并行处理 {#duo-he-xin-bing-xing-chu-li} @@ -27,9 +36,11 @@ ClickHouse会使用服务器上一切可用的资源,从而以最自然的方 ## 支持SQL {#zhi-chi-sql} -ClickHouse支持基于SQL的声明式查询语言,该语言大部分情况下是与SQL标准兼容的。 -支持的查询包括 GROUP BY,ORDER BY,IN,JOIN以及非相关子查询。 -不支持窗口函数和相关子查询。 +ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md),它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。 + +支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。 + +相关(依赖性)子查询和窗口函数暂不受支持,但将来会被实现。 ## 向量引擎 {#xiang-liang-yin-qing} @@ -55,12 +66,20 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进 2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。 3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。 +## Adaptive Join Algorithm {#adaptive-join-algorithm} + +ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表,它更倾向于散列连接算法,如果有多个大表,则使用合并-连接算法 + ## 支持数据复制和数据完整性 {#zhi-chi-shu-ju-fu-zhi-he-shu-ju-wan-zheng-xing} ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后,系统会在后台将数据分发给其他副本,以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复,在一些少数的复杂情况下需要手动恢复。 更多信息,参见 [数据复制](../engines/table-engines/mergetree-family/replication.md)。 +## 角色的访问控制 {#role-based-access-control} + +ClickHouse使用SQL查询实现用户帐户管理,并允许[角色的访问控制](../operations/access-rights.md),类似于ANSI SQL标准和流行的关系数据库管理系统。 + # 限制 {#clickhouseke-xian-zhi} 1. 没有完整的事务支持。 diff --git a/docs/zh/introduction/history.md b/docs/zh/introduction/history.md index 29c8c263f9f..265ade8785b 100644 --- a/docs/zh/introduction/history.md +++ b/docs/zh/introduction/history.md @@ -1,3 +1,8 @@ +--- +toc_priority: 4 +toc_title: ClickHouse历史 +--- + # ClickHouse历史 {#clickhouseli-shi} ClickHouse最初是为 [YandexMetrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。 diff --git a/docs/zh/introduction/performance.md b/docs/zh/introduction/performance.md index a5960cfa52e..0ae4b9b1e1e 100644 --- a/docs/zh/introduction/performance.md +++ b/docs/zh/introduction/performance.md @@ -1,3 +1,8 @@ +--- +toc_priority: 3 +toc_title: ClickHouse性能 +--- + # 性能 {#performance} 根据Yandex的内部测试结果,ClickHouse表现出了比同类可比较产品更优的性能。你可以在 [这里](https://clickhouse.tech/benchmark/dbms/) 查看具体的测试结果。 diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 0ce138c764e..72491bb53ff 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -7,35 +7,37 @@ toc_title: "\u6570\u636E\u5907\u4EFD" # 数据备份 {#data-backup} -碌莽禄While: [复制](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施并不涵盖所有可能的情况,可以规避。 +尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 -为了有效地减少可能的人为错误,您应该仔细准备备份和还原数据的策略 **提前**. +为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略. -每家公司都有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 什么适用于一千兆字节的数据可能不会为几十pb的工作。 有多种可能的方法有自己的优点和缺点,这将在下面讨论。 这是一个好主意,使用几种方法,而不是只是一个,以弥补其各种缺点。 +不同公司有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 适用于 1GB 的数据的方案可能并不适用于几十 PB 数据的情况。 有多种可能的并有自己优缺点的方法,这将在下面讨论。 好的主意是同时结合使用多种方法而不是仅使用一种,这样可以弥补不同方法各自的缺点。 !!! note "注" 请记住,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时(或者至少需要比业务能够容忍的时间更长),恢复可能无法正常工作。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上练习。 ## 将源数据复制到其他地方 {#duplicating-source-data-somewhere-else} -通常被摄入到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). +通常被聚集到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). ## 文件系统快照 {#filesystem-snapshots} 某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们从 [分布](../engines/table-engines/special/distributed.md) 用于以下目的的表 `SELECT` 查询。 任何修改数据的查询都无法访问此类副本上的快照。 作为奖励,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。 -## ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier} +## clickhouse-copier {#clickhouse-copier} -[ツ环板-ョツ嘉ッツ偲](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 它还可用于备份和还原目的,因为它可以在ClickHouse表和集群之间可靠地复制数据。 +[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 因为它可以在ClickHouse表和集群之间可靠地复制数据,所以它还可用于备份和还原数据。 对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。 ## 部件操作 {#manipulations-with-parts} -ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是使用硬链接来实现 `/var/lib/clickhouse/shadow/` 文件夹中,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)). +ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)). + +数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。 有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql-reference/statements/alter.md#alter_manipulations-with-partitions). -第三方工具可用于自动化此方法: [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup). +第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). [原始文章](https://clickhouse.tech/docs/en/operations/backup/) diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index a5c30e46f4c..73896d3f8c1 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -33,10 +33,10 @@ ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。 -可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 +可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。 -此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 +此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。 -要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 +要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 diff --git a/docs/zh/operations/tips.md b/docs/zh/operations/tips.md index 4a8cf2efb96..511e8a22644 100644 --- a/docs/zh/operations/tips.md +++ b/docs/zh/operations/tips.md @@ -11,9 +11,9 @@ 不要禁用超线程。 它有助于某些查询,但不适用于其他查询。 -## 涡轮增压 {#turbo-boost} +## 超频 {#turbo-boost} -强烈推荐涡轮增压。 它显着提高了典型负载的性能。 +强烈推荐超频(turbo-boost)。 它显着提高了典型负载的性能。 您可以使用 `turbostat` 要查看负载下的CPU的实际时钟速率。 ## CPU缩放调控器 {#cpu-scaling-governor} @@ -39,18 +39,18 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover 始终禁用交换文件。 不这样做的唯一原因是,如果您使用的ClickHouse在您的个人笔记本电脑。 -## 巨大的页面 {#huge-pages} +## 大页(Huge Pages) {#huge-pages} -始终禁用透明巨大的页面。 它会干扰内存分alloc,从而导致显着的性能下降。 +始终禁用透明大页(transparent huge pages)。 它会干扰内存分alloc,从而导致显着的性能下降。 ``` bash echo 'never' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled ``` -使用 `perf top` 观看内核中用于内存管理的时间。 -永久巨大的页面也不需要被分配。 +使用 `perf top` 观察内核中用于内存管理的时间。 +永久大页(permanent huge pages)也不需要被分配。 -## 存储子系统 {#storage-subsystem} +## 存储系统 {#storage-subsystem} 如果您的预算允许您使用SSD,请使用SSD。 如果没有,请使用硬盘。 SATA硬盘7200转就行了。 @@ -100,27 +100,27 @@ XFS也是合适的,但它还没有经过ClickHouse的彻底测试。 如果可能的话,至少使用一个10GB的网络。 1Gb也可以工作,但对于使用数十tb的数据修补副本或处理具有大量中间数据的分布式查询,情况会更糟。 -## 动物园管理员 {#zookeeper} +## Zookeeper {#zookeeper} 您可能已经将ZooKeeper用于其他目的。 您可以使用相同的zookeeper安装,如果它还没有超载。 -It’s best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. +最好使用新版本的 Zookeeper – 3.4.9 或之后的版本. 稳定 Liunx 发行版中的 Zookeeper 版本可能是落后的。 -You should never use manually written scripts to transfer data between different ZooKeeper clusters, because the result will be incorrect for sequential nodes. Never use the «zkcopy» utility for the same reason: https://github.com/ksprojects/zkcopy/issues/15 +你永远不该使用自己手写的脚本在不同的 Zookeeper 集群之间转移数据, 这可能会导致序列节点的数据不正确。出于同样的原因,永远不要使用 zkcopy 工具: https://github.com/ksprojects/zkcopy/issues/15 如果要将现有ZooKeeper集群分为两个,正确的方法是增加其副本的数量,然后将其重新配置为两个独立的集群。 -不要在与ClickHouse相同的服务器上运行ZooKeeper。 由于ZooKeeper对延迟非常敏感,ClickHouse可能会利用所有可用的系统资源。 +不要在与ClickHouse相同的服务器上运行ZooKeeper。 因为ZooKeeper对延迟非常敏感,而ClickHouse可能会占用所有可用的系统资源。 -使用默认设置,ZooKeeper是一个定时炸弹: +默认设置下,ZooKeeper 就像是一个定时炸弹: -> 使用默认配置时,ZooKeeper服务器不会从旧快照和日志中删除文件(请参阅autopurge),这是操作员的责任。 +当使用默认配置时,ZooKeeper服务不会从旧快照和日志中删除文件(请参阅autopurge),这是操作员的责任。 -必须拆除炸弹 +必须拆除炸弹。 -下面的ZooKeeper(3.5.1)配置在Yandex中使用。梅地卡生产环境截至2017年5月20日: +下面的ZooKeeper(3.5.1)配置在 Yandex.Metrica 的生产环境中使用截至2017年5月20日: -动物园cfg: +zoo.cfg: ``` bash # http://hadoop.apache.org/zookeeper/docs/current/zookeeperAdmin.html @@ -222,7 +222,7 @@ JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \ -XX:+CMSParallelRemarkEnabled" ``` -盐初始化: +Salt init: description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service" diff --git a/docs/zh/operations/utilities/clickhouse-local.md b/docs/zh/operations/utilities/clickhouse-local.md index 4e89961e198..3ff38c01651 100644 --- a/docs/zh/operations/utilities/clickhouse-local.md +++ b/docs/zh/operations/utilities/clickhouse-local.md @@ -3,18 +3,18 @@ toc_priority: 60 toc_title: clickhouse-local --- -# ツ环板-ョツ嘉ッツ偲 {#clickhouse-local} +# ClickHouse Local {#clickhouse-local} -该 `clickhouse-local` 程序使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。 +`clickhouse-local`模式可以使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。 -接受表示表的数据并使用以下方式查询它们 [ツ环板ECTョツ嘉ッツ偲](../../operations/utilities/clickhouse-local.md). +[ClickHouse SQL语法](../../operations/utilities/clickhouse-local.md)支持对表格数据的查询. -`clickhouse-local` 使用与ClickHouse server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。 +`clickhouse-local`使用与ClickHouse Server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。 -默认情况下 `clickhouse-local` 不能访问同一主机上的数据,但它支持使用以下方式加载服务器配置 `--config-file` 争论。 +默认情况下`clickhouse-local`不能访问同一主机上的数据,但它支持使用`--config-file`方式加载服务器配置。 !!! warning "警告" - 不建议将生产服务器配置加载到 `clickhouse-local` 因为数据可以在人为错误的情况下被损坏。 + 不建议将生产服务器配置加载到`clickhouse-local`因为数据可以在人为错误的情况下被损坏。 ## 用途 {#usage} @@ -26,21 +26,21 @@ clickhouse-local --structure "table_structure" --input-format "format_of_incomin 参数: -- `-S`, `--structure` — table structure for input data. -- `-if`, `--input-format` — input format, `TSV` 默认情况下。 -- `-f`, `--file` — path to data, `stdin` 默认情况下。 -- `-q` `--query` — queries to execute with `;` 如delimeter。 -- `-N`, `--table` — table name where to put output data, `table` 默认情况下。 -- `-of`, `--format`, `--output-format` — output format, `TSV` 默认情况下。 -- `--stacktrace` — whether to dump debug output in case of exception. -- `--verbose` — more details on query execution. -- `-s` — disables `stderr` 记录。 -- `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. -- `--help` — arguments references for `clickhouse-local`. +- `-S`, `--structure` — 输入数据的表结构。 +- `-if`, `--input-format` — 输入格式化类型, 默认是`TSV`。 +- `-f`, `--file` — 数据路径, 默认是`stdin`。 +- `-q` `--query` — 要查询的SQL语句使用`;`做分隔符。 +- `-N`, `--table` — 数据输出的表名,默认是`table`。 +- `-of`, `--format`, `--output-format` — 输出格式化类型, 默认是`TSV`。 +- `--stacktrace` — 是否在出现异常时输出栈信息。 +- `--verbose` — debug显示查询的详细信息。 +- `-s` — 禁用`stderr`输出信息。 +- `--config-file` — 与ClickHouse服务器格式相同配置文件的路径,默认情况下配置为空。 +- `--help` — `clickhouse-local`使用帮助信息。 -还有每个ClickHouse配置变量的参数,这些变量更常用,而不是 `--config-file`. +对于每个ClickHouse配置的参数,也可以单独使用,可以不使用`--config-file`指定。 -## 例 {#examples} +## 示例 {#examples} ``` bash echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table" @@ -49,7 +49,7 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. 3 4 ``` -前面的例子是一样的: +另一个示例,类似上一个使用示例: ``` bash $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" @@ -58,7 +58,22 @@ Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 3 4 ``` -现在让我们为每个Unix用户输出内存用户: +你可以使用`stdin`或`--file`参数, 打开任意数量的文件来使用多个文件[`file` table function](../../sql-reference/table-functions/file.md): + +```bash +$ echo 1 | tee 1.tsv +1 + +$ echo 2 | tee 2.tsv +2 + +$ clickhouse-local --query " + select * from file('1.tsv', TSV, 'a int') t1 + cross join file('2.tsv', TSV, 'b int') t2" +1 2 +``` + +现在让我们查询每个Unix用户使用内存: ``` bash $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md index 8d1dcda4d83..cf7dddb9b7e 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference.md +++ b/docs/zh/sql-reference/aggregate-functions/reference.md @@ -462,69 +462,6 @@ kurtSamp(expr) SELECT kurtSamp(value) FROM series_with_value_column ``` -## timeSeriesGroupSum(uid,timestamp,value) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` 可以聚合不同的时间序列,即采样时间戳不对齐。 -它将在两个采样时间戳之间使用线性插值,然后将时间序列和在一起。 - -- `uid` 是时间序列唯一id, `UInt64`. -- `timestamp` 是Int64型,以支持毫秒或微秒。 -- `value` 是指标。 - -函数返回元组数组 `(timestamp, aggregated_value)` 对。 - -在使用此功能之前,请确保 `timestamp` 按升序排列 - -示例: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -其结果将是: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## timeSeriesGroupRateSum(uid,ts,val) {#agg-function-timeseriesgroupratesum} - -同样 `timeSeriesGroupSum`, `timeSeriesGroupRateSum` 计算时间序列的速率,然后将速率总和在一起。 -此外,使用此函数之前,时间戳应该是上升顺序。 - -应用此功能从数据 `timeSeriesGroupSum` 例如,您将得到以下结果: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - ## avg(x) {#agg_function-avg} 计算平均值。 diff --git a/docs/zh/sql-reference/functions/bitmap-functions.md b/docs/zh/sql-reference/functions/bitmap-functions.md index d2018f5d9c1..5a6baf2f217 100644 --- a/docs/zh/sql-reference/functions/bitmap-functions.md +++ b/docs/zh/sql-reference/functions/bitmap-functions.md @@ -6,7 +6,7 @@ 我们使用RoaringBitmap实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap保存。这也是为什么低基数集的存储更快的原因。 -有关RoaringBitmap的更多信息,请参阅:[呻吟声](https://github.com/RoaringBitmap/CRoaring)。 +有关RoaringBitmap的更多信息,请参阅:[RoaringBitmap](https://github.com/RoaringBitmap/CRoaring)。 ## bitmapBuild {#bitmapbuild} diff --git a/docs/zh/sql-reference/statements/create.md b/docs/zh/sql-reference/statements/create.md index fa3cb8e5ea5..639af0841dc 100644 --- a/docs/zh/sql-reference/statements/create.md +++ b/docs/zh/sql-reference/statements/create.md @@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 3577ee3df31..8f45bf53f53 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -43,13 +43,81 @@ else () ${ENABLE_CLICKHOUSE_ALL}) endif () +message(STATUS "ClickHouse modes:") + +if (NOT ENABLE_CLICKHOUSE_SERVER) + message(WARNING "ClickHouse server mode is not going to be built.") +else() + message(STATUS "Server mode: ON") +endif() + +if (NOT ENABLE_CLICKHOUSE_CLIENT) + message(WARNING "ClickHouse client mode is not going to be built. You won't be able to connect to the server and run + tests") +else() + message(STATUS "Client mode: ON") +endif() + +if (ENABLE_CLICKHOUSE_LOCAL) + message(STATUS "Local mode: ON") +else() + message(STATUS "Local mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_BENCHMARK) + message(STATUS "Benchmark mode: ON") +else() + message(STATUS "Benchmark mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) + message(STATUS "Extract from config mode: ON") +else() + message(STATUS "Extract from config mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_COMPRESSOR) + message(STATUS "Compressor mode: ON") +else() + message(STATUS "Compressor mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_COPIER) + message(STATUS "Copier mode: ON") +else() + message(STATUS "Copier mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_FORMAT) + message(STATUS "Format mode: ON") +else() + message(STATUS "Format mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_OBFUSCATOR) + message(STATUS "Obfuscator mode: ON") +else() + message(STATUS "Obfuscator mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + message(STATUS "ODBC bridge mode: ON") +else() + message(STATUS "ODBC bridge mode: OFF") +endif() + +if (ENABLE_CLICKHOUSE_INSTALL) + message(STATUS "ClickHouse install: ON") +else() + message(STATUS "ClickHouse install: OFF") +endif() + if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) set(CLICKHOUSE_ONE_SHARED ON) endif() configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h) - macro(clickhouse_target_link_split_lib target name) if(NOT CLICKHOUSE_ONE_SHARED) target_link_libraries(${target} PRIVATE clickhouse-${name}-lib) @@ -227,9 +295,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () - if(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif() install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 08ded9ed870..a383a06ade0 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -49,7 +49,6 @@ using Ports = std::vector; namespace ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; - extern const int BAD_ARGUMENTS; extern const int EMPTY_DATA_PASSED; } @@ -103,17 +102,7 @@ public: /// (example: when using stage = 'with_mergeable_state') registerAggregateFunctions(); - if (stage == "complete") - query_processing_stage = QueryProcessingStage::Complete; - else if (stage == "fetch_columns") - query_processing_stage = QueryProcessingStage::FetchColumns; - else if (stage == "with_mergeable_state") - query_processing_stage = QueryProcessingStage::WithMergeableState; - else if (stage == "with_mergeable_state_after_aggregation") - query_processing_stage = QueryProcessingStage::WithMergeableStateAfterAggregation; - else - throw Exception("Unknown query processing stage: " + stage, ErrorCodes::BAD_ARGUMENTS); - + query_processing_stage = QueryProcessingStage::fromString(stage); } void initialize(Poco::Util::Application & self [[maybe_unused]]) override diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 005fece3277..e4858eeda8b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -222,6 +224,7 @@ private: /// We will format query_id in interactive mode in various ways, the default is just to print Query id: ... std::vector> query_id_formats; + QueryProcessingStage::Enum query_processing_stage; void initialize(Poco::Util::Application & self) override { @@ -463,6 +466,7 @@ private: { UseSSL use_ssl; + registerFormats(); registerFunctions(); registerAggregateFunctions(); @@ -1156,13 +1160,13 @@ private: ASTPtr ast_to_process; try { - std::stringstream dump_before_fuzz; + WriteBufferFromOwnString dump_before_fuzz; fuzz_base->dumpTree(dump_before_fuzz); auto base_before_fuzz = fuzz_base->formatForErrorMessage(); ast_to_process = fuzz_base->clone(); - std::stringstream dump_of_cloned_ast; + WriteBufferFromOwnString dump_of_cloned_ast; ast_to_process->dumpTree(dump_of_cloned_ast); // Run the original query as well. @@ -1184,7 +1188,9 @@ private: fprintf(stderr, "dump of cloned ast:\n%s\n", dump_of_cloned_ast.str().c_str()); fprintf(stderr, "dump after fuzz:\n"); - fuzz_base->dumpTree(std::cerr); + WriteBufferFromOStream cerr_buf(std::cerr, 4096); + fuzz_base->dumpTree(cerr_buf); + cerr_buf.next(); fmt::print(stderr, "IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly."); @@ -1439,7 +1445,7 @@ private: connection_parameters.timeouts, query_to_send, context.getCurrentQueryId(), - QueryProcessingStage::Complete, + query_processing_stage, &context.getSettingsRef(), &context.getClientInfo(), true); @@ -1480,7 +1486,7 @@ private: connection_parameters.timeouts, query_to_send, context.getCurrentQueryId(), - QueryProcessingStage::Complete, + query_processing_stage, &context.getSettingsRef(), &context.getClientInfo(), true); @@ -1527,7 +1533,9 @@ private: if (is_interactive) { std::cout << std::endl; - formatAST(*res, std::cout); + WriteBufferFromOStream res_buf(std::cout, 4096); + formatAST(*res, res_buf); + res_buf.next(); std::cout << std::endl << std::endl; } @@ -2302,6 +2310,7 @@ public: ("password", po::value()->implicit_value("\n", ""), "password") ("ask-password", "ask-password") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") + ("stage", po::value()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation") ("query_id", po::value(), "query_id") ("query,q", po::value(), "query") ("database,d", po::value(), "database") @@ -2329,6 +2338,7 @@ public: ("query-fuzzer-runs", po::value()->default_value(0), "query fuzzer runs") ("opentelemetry-traceparent", po::value(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation") ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") + ("history_file", po::value(), "path to history file") ; Settings cmd_settings; @@ -2425,6 +2435,8 @@ public: if (options.count("config-file") && options.count("config")) throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + /// Save received data into the internal config. if (options.count("config-file")) config().setString("config-file", options["config-file"].as()); @@ -2485,6 +2497,8 @@ public: config().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) config().setBool("highlight", options["highlight"].as()); + if (options.count("history_file")) + config().setString("history_file", options["history_file"].as()); if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { @@ -2501,7 +2515,7 @@ public: { std::string traceparent = options["opentelemetry-traceparent"].as(); std::string error; - if (!context.getClientInfo().parseTraceparentHeader( + if (!context.getClientInfo().client_trace_context.parseTraceparentHeader( traceparent, error)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -2512,7 +2526,7 @@ public: if (options.count("opentelemetry-tracestate")) { - context.getClientInfo().opentelemetry_tracestate = + context.getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as(); } diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp index 3fe6b8087f0..a8e32d87db5 100644 --- a/programs/client/QueryFuzzer.cpp +++ b/programs/client/QueryFuzzer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -419,7 +420,9 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) fuzz(ast); std::cout << std::endl; - formatAST(*ast, std::cout, false /*highlight*/); + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.next(); std::cout << std::endl << std::endl; } diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index e85e7a21261..87083c2c27b 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -86,7 +86,7 @@ Suggest::Suggest() void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) { - std::stringstream query; + std::stringstream query; // STYLE_CHECK_ALLOW_STD_STRING_STREAM query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" "SELECT name FROM system.functions" " UNION ALL " diff --git a/programs/client/TestHint.h b/programs/client/TestHint.h index 641c3e0ccf0..65666f4304c 100644 --- a/programs/client/TestHint.h +++ b/programs/client/TestHint.h @@ -93,7 +93,7 @@ private: void parse(const String & hint) { - std::stringstream ss; + std::stringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM ss << hint; String item; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 4ee14b14119..2f19fc47fd2 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -62,6 +62,9 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) { std::exception_ptr exception; + if (max_tries == 0) + throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR); + for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { try @@ -162,7 +165,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, if (!missing_partitions.empty()) { - std::stringstream ss; + WriteBufferFromOwnString ss; for (const String & missing_partition : missing_partitions) ss << " " << missing_partition; @@ -605,7 +608,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t settings_push.replication_alter_partitions_sync = 2; query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) + - " ATTACH PARTITION " + partition_name + + ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name + " FROM " + getQuotedTable(helping_table); LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string); @@ -636,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t if (!task_table.isReplicatedTable()) { query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) + - " PARTITION " + partition_name + " DEDUPLICATE;"; + ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;"; LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string); @@ -807,7 +810,7 @@ bool ClusterCopier::tryDropPartitionPiece( DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); String query = "ALTER TABLE " + getQuotedTable(helping_table); - query += " DROP PARTITION " + task_partition.name + ""; + query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + ""; /// TODO: use this statement after servers will be updated up to 1.1.54310 // query += " DROP PARTITION ID '" + task_partition.name + "'"; @@ -1567,7 +1570,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT DatabaseAndTableName original_table = task_table.table_push; DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - String query = "ALTER TABLE " + getQuotedTable(helping_table) + " DROP PARTITION " + partition_name; + String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name; const ClusterPtr & cluster_push = task_table.cluster_push; Settings settings_push = task_cluster->settings_push; @@ -1670,14 +1673,24 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) { + std::set res; + createShardInternalTables(timeouts, task_shard, false); TaskTable & task_table = task_shard.task_table; + const String & partition_name = queryToString(task_table.engine_push_partition_key_ast); + + if (partition_name == "'all'") + { + res.emplace("'all'"); + return res; + } + String query; { WriteBufferFromOwnString wb; - wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM" + wb << "SELECT DISTINCT " << partition_name << " AS partition FROM" << " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC"; query = wb.str(); } @@ -1692,7 +1705,6 @@ std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti local_context.setSettings(task_cluster->settings_pull); Block block = getBlockWithAllStreamData(InterpreterFactory::get(query_ast, local_context)->execute().getInputStream()); - std::set res; if (block) { ColumnWithTypeAndName & column = block.getByPosition(0); @@ -1803,7 +1815,7 @@ UInt64 ClusterCopier::executeQueryOnCluster( if (execution_mode == ClusterExecutionMode::ON_EACH_NODE) max_successful_executions_per_shard = 0; - std::atomic origin_replicas_number; + std::atomic origin_replicas_number = 0; /// We need to execute query on one replica at least auto do_for_shard = [&] (UInt64 shard_index, Settings shard_settings) diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 08a7e50a9d7..c2946e12c34 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,6 +1,7 @@ #include "ClusterCopierApp.h" #include #include +#include #include @@ -122,6 +123,7 @@ void ClusterCopierApp::mainImpl() registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); static const std::string default_database = "_local"; DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0f607ea5faf..ea2be469945 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -13,7 +13,7 @@ using ConfigurationPtr = Poco::AutoPtr; ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data) { - std::stringstream ss(xml_data); + std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM Poco::XML::InputSource input_source{ss}; return {new Poco::Util::XMLConfiguration{&input_source}}; } diff --git a/programs/copier/TaskTableAndShard.h b/programs/copier/TaskTableAndShard.h index cc12fe556c7..4f5bfb443e6 100644 --- a/programs/copier/TaskTableAndShard.h +++ b/programs/copier/TaskTableAndShard.h @@ -394,12 +394,8 @@ inline ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() inline String DB::TaskShard::getDescription() const { - std::stringstream ss; - ss << "N" << numberInCluster() - << " (having a replica " << getHostNameExample() - << ", pull table " + getQuotedTable(task_table.table_pull) - << " of cluster " + task_table.cluster_pull_name << ")"; - return ss.str(); + return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", + numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); } inline String DB::TaskShard::getHostNameExample() const diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 01f952bf95e..986dc67a798 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -129,7 +130,9 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); if (!quiet) { - formatAST(*res, std::cout, hilite, oneline); + WriteBufferFromOStream res_buf(std::cout, 4096); + formatAST(*res, res_buf, hilite, oneline); + res_buf.next(); if (multiple) std::cout << "\n;\n"; std::cout << std::endl; diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index 7cdd77b4b7c..b6ea2c7baf4 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -680,7 +680,7 @@ void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & fil for (auto & elem : file_changes) { auto & file = elem.second.file_change; - if (file.path != file.old_path) + if (!file.old_path.empty() && file.path != file.old_path) snapshot[file.path] = snapshot[file.old_path]; } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 8290118089c..9e3942e126d 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -70,7 +71,7 @@ namespace po = boost::program_options; namespace fs = std::filesystem; -auto executeScript(const std::string & command, bool throw_on_error = false) +static auto executeScript(const std::string & command, bool throw_on_error = false) { auto sh = ShellCommand::execute(command); WriteBufferFromFileDescriptor wb_stdout(STDOUT_FILENO); @@ -87,7 +88,7 @@ auto executeScript(const std::string & command, bool throw_on_error = false) return sh->tryWait(); } -bool ask(std::string question) +static bool ask(std::string question) { while (true) { @@ -104,6 +105,16 @@ bool ask(std::string question) } } +static bool filesEqual(std::string path1, std::string path2) +{ + MMapReadBufferFromFile in1(path1, 0); + MMapReadBufferFromFile in2(path2, 0); + + /// memcmp is faster than hashing and comparing hashes + return in1.buffer().size() == in2.buffer().size() + && 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size()); +} + int mainEntryClickHouseInstall(int argc, char ** argv) { @@ -143,57 +154,89 @@ int mainEntryClickHouseInstall(int argc, char ** argv) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist", binary_self_path.string()); + fs::path binary_self_canonical_path = fs::canonical(binary_self_path); + /// Copy binary to the destination directory. /// TODO An option to link instead of copy - useful for developers. - /// TODO Check if the binary is the same. - - size_t binary_size = fs::file_size(binary_self_path); fs::path prefix = fs::path(options["prefix"].as()); fs::path bin_dir = prefix / fs::path(options["binary-path"].as()); - size_t available_space = fs::space(bin_dir).available; - if (available_space < binary_size) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", - bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space)); - fs::path main_bin_path = bin_dir / "clickhouse"; fs::path main_bin_tmp_path = bin_dir / "clickhouse.new"; fs::path main_bin_old_path = bin_dir / "clickhouse.old"; - fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string()); + size_t binary_size = fs::file_size(binary_self_path); - try + bool old_binary_exists = fs::exists(main_bin_path); + bool already_installed = false; + + /// Check if the binary is the same file (already installed). + if (old_binary_exists && binary_self_canonical_path == fs::canonical(main_bin_path)) { - ReadBufferFromFile in(binary_self_path.string()); - WriteBufferFromFile out(main_bin_tmp_path.string()); - copyData(in, out); - out.sync(); - - if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) - throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); - - out.finalize(); + already_installed = true; + fmt::print("ClickHouse binary is already located at {}\n", main_bin_path.string()); } - catch (const Exception & e) + /// Check if binary has the same content. + else if (old_binary_exists && binary_size == fs::file_size(main_bin_path)) { - if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0) - std::cerr << "Install must be run as root: sudo ./clickhouse install\n"; - throw; + fmt::print("Found already existing ClickHouse binary at {} having the same size. Will check its contents.\n", + main_bin_path.string()); + + if (filesEqual(binary_self_path.string(), main_bin_path.string())) + { + already_installed = true; + fmt::print("ClickHouse binary is already located at {} and it has the same content as {}\n", + main_bin_path.string(), binary_self_canonical_path.string()); + } } - if (fs::exists(main_bin_path)) + if (already_installed) { - fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n", - main_bin_path.string(), main_bin_old_path.string()); - - /// There is file exchange operation in Linux but it's not portable. - fs::rename(main_bin_path, main_bin_old_path); + if (0 != chmod(main_bin_path.string().c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + throwFromErrno(fmt::format("Cannot chmod {}", main_bin_path.string()), ErrorCodes::SYSTEM_ERROR); } + else + { + size_t available_space = fs::space(bin_dir).available; + if (available_space < binary_size) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", + bin_dir.string(), ReadableSize(binary_size), ReadableSize(available_space)); - fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string()); - fs::rename(main_bin_tmp_path, main_bin_path); + fmt::print("Copying ClickHouse binary to {}\n", main_bin_tmp_path.string()); + + try + { + ReadBufferFromFile in(binary_self_path.string()); + WriteBufferFromFile out(main_bin_tmp_path.string()); + copyData(in, out); + out.sync(); + + if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); + + out.finalize(); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0) + std::cerr << "Install must be run as root: sudo ./clickhouse install\n"; + throw; + } + + if (old_binary_exists) + { + fmt::print("{} already exists, will rename existing binary to {} and put the new binary in place\n", + main_bin_path.string(), main_bin_old_path.string()); + + /// There is file exchange operation in Linux but it's not portable. + fs::rename(main_bin_path, main_bin_old_path); + } + + fmt::print("Renaming {} to {}.\n", main_bin_tmp_path.string(), main_bin_path.string()); + fs::rename(main_bin_tmp_path, main_bin_path); + } /// Create symlinks. @@ -329,14 +372,20 @@ int mainEntryClickHouseInstall(int argc, char ** argv) bool has_password_for_default_user = false; - if (!fs::exists(main_config_file)) + if (!fs::exists(config_d)) { fmt::print("Creating config directory {} that is used for tweaks of main server configuration.\n", config_d.string()); fs::create_directory(config_d); + } + if (!fs::exists(users_d)) + { fmt::print("Creating config directory {} that is used for tweaks of users configuration.\n", users_d.string()); fs::create_directory(users_d); + } + if (!fs::exists(main_config_file)) + { std::string_view main_config_content = getResource("config.xml"); if (main_config_content.empty()) { @@ -349,7 +398,30 @@ int mainEntryClickHouseInstall(int argc, char ** argv) out.sync(); out.finalize(); } + } + else + { + fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string()); + ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); + + if (configuration->has("path")) + { + data_path = configuration->getString("path"); + fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); + } + + if (configuration->has("logger.log")) + { + log_path = fs::path(configuration->getString("logger.log")).remove_filename(); + fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); + } + } + + + if (!fs::exists(users_config_file)) + { std::string_view users_config_content = getResource("users.xml"); if (users_config_content.empty()) { @@ -365,38 +437,17 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { - { - fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string()); - - ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - - if (configuration->has("path")) - { - data_path = configuration->getString("path"); - fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); - } - - if (configuration->has("logger.log")) - { - log_path = fs::path(configuration->getString("logger.log")).remove_filename(); - fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); - } - } + fmt::print("Users config file {} already exists, will keep it and extract users info from it.\n", users_config_file.string()); /// Check if password for default user already specified. + ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - if (fs::exists(users_config_file)) + if (!configuration->getString("users.default.password", "").empty() + || !configuration->getString("users.default.password_sha256_hex", "").empty() + || !configuration->getString("users.default.password_double_sha1_hex", "").empty()) { - ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - - if (!configuration->getString("users.default.password", "").empty() - || configuration->getString("users.default.password_sha256_hex", "").empty() - || configuration->getString("users.default.password_double_sha1_hex", "").empty()) - { - has_password_for_default_user = true; - } + has_password_for_default_user = true; } } @@ -568,7 +619,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) " || echo \"Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary." " This is optional. Taskstats accounting will be disabled." " To enable taskstats accounting you may add the required capability later manually.\"", - "/tmp/test_setcap.sh", main_bin_path.string()); + "/tmp/test_setcap.sh", fs::canonical(main_bin_path).string()); fmt::print(" {}\n", command); executeScript(command); #endif @@ -589,10 +640,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } } - std::string maybe_sudo; - if (getuid() != 0) - maybe_sudo = "sudo "; - std::string maybe_password; if (has_password_for_default_user) maybe_password = " --password"; @@ -600,10 +647,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fmt::print( "\nClickHouse has been successfully installed.\n" "\nStart clickhouse-server with:\n" - " {}clickhouse start\n" + " sudo clickhouse start\n" "\nStart clickhouse-client with:\n" " clickhouse-client{}\n\n", - maybe_sudo, maybe_password); + maybe_password); + } + catch (const fs::filesystem_error &) + { + std::cerr << getCurrentExceptionMessage(false) << '\n'; + + if (getuid() != 0) + std::cerr << "\nRun with sudo.\n"; + + return getCurrentExceptionCode(); } catch (...) { @@ -775,17 +831,20 @@ namespace return pid; } - int stop(const fs::path & pid_file) + int stop(const fs::path & pid_file, bool force) { UInt64 pid = isRunning(pid_file); if (!pid) return 0; - if (0 == kill(pid, 15)) /// Terminate - fmt::print("Sent termination signal.\n", pid); + int signal = force ? SIGKILL : SIGTERM; + const char * signal_name = force ? "kill" : "terminate"; + + if (0 == kill(pid, signal)) + fmt::print("Sent {} signal to process with pid {}.\n", signal_name, pid); else - throwFromErrno("Cannot send termination signal", ErrorCodes::SYSTEM_ERROR); + throwFromErrno(fmt::format("Cannot send {} signal", signal_name), ErrorCodes::SYSTEM_ERROR); size_t try_num = 0; constexpr size_t num_tries = 60; @@ -861,6 +920,7 @@ int mainEntryClickHouseStop(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") + ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") ; po::variables_map options; @@ -879,7 +939,7 @@ int mainEntryClickHouseStop(int argc, char ** argv) { fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; - return stop(pid_file); + return stop(pid_file, options["force"].as()); } catch (...) { @@ -932,6 +992,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv) ("config-path", po::value()->default_value("/etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value("clickhouse"), "clickhouse user") + ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") ; po::variables_map options; @@ -954,7 +1015,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv) fs::path config = fs::path(options["config-path"].as()) / "config.xml"; fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; - if (int res = stop(pid_file)) + if (int res = stop(pid_file, options["force"].as())) return res; return start(user, executable, config, pid_file); } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b9dde555788..15e71198eb1 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ try registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); /// Maybe useless if (config().has("macros")) @@ -420,7 +422,7 @@ static const char * minimal_default_user_xml = static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data) { - std::stringstream ss{std::string{xml_data}}; + std::stringstream ss{std::string{xml_data}}; // STYLE_CHECK_ALLOW_STD_STRING_STREAM Poco::XML::InputSource input_source{ss}; return {new Poco::Util::XMLConfiguration{&input_source}}; } diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 756aab0a574..d0d7f201c68 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1050,6 +1051,8 @@ try using namespace DB; namespace po = boost::program_options; + registerFormats(); + po::options_description description = createOptionsDescription("Options", getTerminalWidth()); description.add_options() ("help", "produce help message") diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 8cfa110adad..11864354619 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,19 +10,8 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp SchemaAllowedHandler.cpp validateODBCConnectionString.cpp + odbc-bridge.cpp ) -set (CLICKHOUSE_ODBC_BRIDGE_LINK - PRIVATE - clickhouse_parsers - clickhouse_aggregate_functions - daemon - dbms - Poco::Data - PUBLIC - Poco::Data::ODBC -) - -clickhouse_program_add_library(odbc-bridge) if (OS_LINUX) # clickhouse-odbc-bridge is always a separate binary. @@ -30,10 +19,17 @@ if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -add_executable(clickhouse-odbc-bridge odbc-bridge.cpp) -set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) +add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) -clickhouse_program_link_split_binary(odbc-bridge) +target_link_libraries(clickhouse-odbc-bridge PRIVATE + daemon + dbms + clickhouse_parsers + Poco::Data + Poco::Data::ODBC +) + +set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 33723b4cc8e..9e2307715c5 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -113,16 +113,16 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques /// TODO Why not do SQLColumns instead? std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name); - std::stringstream ss; + WriteBufferFromOwnString buf; std::string input = "SELECT * FROM " + name + " WHERE 1 = 0"; ParserQueryWithOutput parser; ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth); - IAST::FormatSettings settings(ss, true); + IAST::FormatSettings settings(buf, true); settings.always_quote_identifiers = true; settings.identifier_quoting_style = getQuotingStyle(hdbc); select->format(settings); - std::string query = ss.str(); + std::string query = buf.str(); LOG_TRACE(log, "Inferring structure with query '{}'", query); diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index 82ca861ea67..4d8b9fa6bdf 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -32,12 +32,12 @@ namespace for (const auto & column : columns) query.columns->children.emplace_back(std::make_shared(column.name)); - std::stringstream ss; - IAST::FormatSettings settings(ss, true); + WriteBufferFromOwnString buf; + IAST::FormatSettings settings(buf, true); settings.always_quote_identifiers = true; settings.identifier_quoting_style = quoting; query.IAST::format(settings); - return ss.str(); + return buf.str(); } std::string getQuestionMarks(size_t n) diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 3f119fbf7ba..3b26e192a07 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include + namespace DB { namespace ErrorCodes @@ -107,6 +109,14 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options) .argument("err-log-path") .binding("logger.errorlog")); + options.addOption(Poco::Util::Option("stdout-path", "", "stdout log path, default console") + .argument("stdout-path") + .binding("logger.stdout")); + + options.addOption(Poco::Util::Option("stderr-path", "", "stderr log path, default console") + .argument("stderr-path") + .binding("logger.stderr")); + using Me = std::decay_t; options.addOption(Poco::Util::Option("help", "", "produce this help message") .binding("help") @@ -125,6 +135,27 @@ void ODBCBridge::initialize(Application & self) config().setString("logger", "ODBCBridge"); + /// Redirect stdout, stderr to specified files. + /// Some libraries and sanitizers write to stderr in case of errors. + const auto stdout_path = config().getString("logger.stdout", ""); + if (!stdout_path.empty()) + { + if (!freopen(stdout_path.c_str(), "a+", stdout)) + throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path); + + /// Disable buffering for stdout. + setbuf(stdout, nullptr); + } + const auto stderr_path = config().getString("logger.stderr", ""); + if (!stderr_path.empty()) + { + if (!freopen(stderr_path.c_str(), "a+", stderr)) + throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path); + + /// Disable buffering for stderr. + setbuf(stderr, nullptr); + } + buildLoggers(config(), logger(), self.commandName()); BaseDaemon::logRevision(); @@ -160,6 +191,8 @@ int ODBCBridge::main(const std::vector & /*args*/) if (is_help) return Application::EXIT_OK; + registerFormats(); + LOG_INFO(log, "Starting up"); Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, hostname, port, log); diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index 60e7afab969..3e0af8c940f 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,3 +1,2 @@ -add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp) -clickhouse_target_link_split_lib(validate-odbc-connection-string odbc-bridge) +add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b85cb5e75f2..26339c5ad3f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) @@ -83,6 +85,11 @@ # include #endif +#if USE_GRPC +# include +#endif + + namespace CurrentMetrics { extern const Metric Revision; @@ -190,10 +197,10 @@ int Server::run() if (config().hasOption("help")) { Poco::Util::HelpFormatter help_formatter(Server::options()); - std::stringstream header; - header << commandName() << " [OPTION] [-- [ARG]...]\n"; - header << "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010"; - help_formatter.setHeader(header.str()); + auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n" + "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010", + commandName()); + help_formatter.setHeader(header_str); help_formatter.format(std::cout); return 0; } @@ -266,6 +273,7 @@ int Server::main(const std::vector & /*args*/) registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); @@ -566,6 +574,8 @@ int Server::main(const std::vector & /*args*/) if (config->has("zookeeper")) global_context->reloadZooKeeperIfChanged(config); + global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); + global_context->updateStorageConfiguration(*config); }, /* already_loaded = */ true); @@ -802,7 +812,7 @@ int Server::main(const std::vector & /*args*/) http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); - std::vector> servers; + std::vector servers; std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); @@ -1031,6 +1041,15 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString()); }); +#if USE_GRPC + create_server("grpc_port", [&](UInt16 port) + { + Poco::Net::SocketAddress server_address(listen_host, port); + servers.emplace_back(std::make_unique(*this, make_socket_address(listen_host, port))); + LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString()); + }); +#endif + /// Prometheus (if defined and not setup yet with http_port) create_server("prometheus.port", [&](UInt16 port) { @@ -1052,7 +1071,7 @@ int Server::main(const std::vector & /*args*/) global_context->enableNamedSessions(); for (auto & server : servers) - server->start(); + server.start(); { String level_str = config().getString("text_log.level", ""); @@ -1084,8 +1103,8 @@ int Server::main(const std::vector & /*args*/) int current_connections = 0; for (auto & server : servers) { - server->stop(); - current_connections += server->currentConnections(); + server.stop(); + current_connections += server.currentConnections(); } if (current_connections) @@ -1105,7 +1124,7 @@ int Server::main(const std::vector & /*args*/) { current_connections = 0; for (auto & server : servers) - current_connections += server->currentConnections(); + current_connections += server.currentConnections(); if (!current_connections) break; sleep_current_ms += sleep_one_ms; diff --git a/programs/server/config.d/logging_no_rotate.xml b/programs/server/config.d/logging_no_rotate.xml new file mode 120000 index 00000000000..cd66c69b3ed --- /dev/null +++ b/programs/server/config.d/logging_no_rotate.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/logging_no_rotate.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index e17b59671af..6b74a3e1e8a 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -11,6 +11,9 @@ trace /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log + 1000M 10 @@ -131,6 +134,34 @@ 4096 3 + + + + true + + + + + + + + + + + + + + 100 @@ -675,7 +706,7 @@ *_dictionary.xml diff --git a/programs/server/play.html b/programs/server/play.html index 22eea0002ca..37869228c04 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -1,6 +1,7 @@ + ClickHouse Query @@ -286,6 +288,8 @@
 (Ctrl+Enter) + + 🌑🌞
@@ -299,50 +303,117 @@