diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 633dd47a2d5..00000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,32 +0,0 @@ -# See the example here: https://github.com/github/codeql-action - -name: "CodeQL Scanning" - -on: - schedule: - - cron: '0 19 * * *' -jobs: - CodeQL-Build: - - runs-on: self-hosted - timeout-minutes: 1440 - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - fetch-depth: 2 - submodules: 'recursive' - - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - - with: - languages: cpp - - - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build - - run: cd build && CC=gcc-10 CXX=g++-10 cmake .. - - run: cd build && ninja - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index 1e9765dca9e..d33dbf0600d 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,9 @@ website/package-lock.json /prof *.iml + +# data store +/programs/server/data +/programs/server/metadata +/programs/server/store + diff --git a/.gitmodules b/.gitmodules index ecccf0633e2..7a2c5600e65 100644 --- a/.gitmodules +++ b/.gitmodules @@ -184,7 +184,7 @@ url = https://github.com/ClickHouse-Extras/krb5 [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl - url = https://github.com/cyrusimap/cyrus-sasl + url = https://github.com/ClickHouse-Extras/cyrus-sasl branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring @@ -220,4 +220,4 @@ url = https://github.com/ClickHouse-Extras/boringssl.git [submodule "contrib/NuRaft"] path = contrib/NuRaft - url = https://github.com/eBay/NuRaft.git + url = https://github.com/ClickHouse-Extras/NuRaft.git diff --git a/CHANGELOG.md b/CHANGELOG.md index b328dcf5c88..e2c777b3bcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,180 @@ +## ClickHouse release 21.2 + +### ClickHouse release v21.2.2.8-stable, 2021-02-07 + +#### Backward Incompatible Change + +* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). +* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). +* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). +* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. + +#### New Feature + +* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). +* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). +* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)). +* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). +* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)). +* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,"world"!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). +* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)). +* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). +* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). +* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)). +* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). +* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)). + +#### Performance Improvement + +* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)). +* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). +* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)). +* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)). +* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). +* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). +* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement + +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). +* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). +* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). +* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)). +* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). +* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). +* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). +* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)). +* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). +* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). +* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). +* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). +* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). +* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). +* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). +* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)). +* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix + +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)). +* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)). +* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). +* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). +* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). + + +#### Build/Testing/Packaging Improvement + +* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). +* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). +* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). +* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). +* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). +* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). +* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). +* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). +* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + ## ClickHouse release 21.1 +### ClickHouse release v21.1.3.32-stable, 2021-02-03 + +#### Bug Fix + +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). +* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). + + + ### ClickHouse release v21.1.2.15-stable 2021-01-18 #### Backward Incompatible Change diff --git a/README.md b/README.md index 8e114d5abe9..3329a98877f 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,8 @@ ClickHouse® is an open-source column-oriented database management system that a * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. -* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events -* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021. diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index b5d4be950b5..0538d3a9ab8 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -278,6 +278,31 @@ public: return res / 3600; } + /** Calculating offset from UTC in seconds. + * which means Using the same literal time of "t" to get the corresponding timestamp in UTC, + * then subtract the former from the latter to get the offset result. + * The boundaries when meets DST(daylight saving time) change should be handled very carefully. + */ + inline time_t timezoneOffset(time_t t) const + { + DayNum index = findIndex(t); + + /// Calculate daylight saving offset first. + /// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin. + /// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date, + /// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now. + time_t res = (lut[index].date - lut[0].date) % 86400; + /// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder + /// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward. + res = res > 43200 ? (86400 - res) : (0 - res); + + /// Check if has a offset change during this day. Add the change when cross the line + if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change) + res += lut[index].amount_of_offset_change; + + return res + offset_at_start_of_epoch; + } + /** Only for time zones with/when offset from UTC is multiple of five minutes. * This is true for all time zones: right now, all time zones have an offset that is multiple of 15 minutes. * diff --git a/base/common/LocalDate.h b/base/common/LocalDate.h index a063d6e98a3..e5ebe877bc5 100644 --- a/base/common/LocalDate.h +++ b/base/common/LocalDate.h @@ -168,14 +168,6 @@ public: static_assert(sizeof(LocalDate) == 4); -inline std::ostream & operator<< (std::ostream & ostr, const LocalDate & date) -{ - return ostr << date.year() - << '-' << (date.month() / 10) << (date.month() % 10) - << '-' << (date.day() / 10) << (date.day() % 10); -} - - namespace std { inline string to_string(const LocalDate & date) diff --git a/base/common/LocalDateTime.h b/base/common/LocalDateTime.h index d19d862f2ca..0e237789bd1 100644 --- a/base/common/LocalDateTime.h +++ b/base/common/LocalDateTime.h @@ -169,20 +169,6 @@ public: static_assert(sizeof(LocalDateTime) == 8); -inline std::ostream & operator<< (std::ostream & ostr, const LocalDateTime & datetime) -{ - ostr << std::setfill('0') << std::setw(4) << datetime.year(); - - ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10) - << '-' << (datetime.day() / 10) << (datetime.day() % 10) - << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10) - << ':' << (datetime.minute() / 10) << (datetime.minute() % 10) - << ':' << (datetime.second() / 10) << (datetime.second() % 10); - - return ostr; -} - - namespace std { inline string to_string(const LocalDateTime & datetime) diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 28c7990c353..fcd1610e589 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -12,6 +12,8 @@ #include #include #include +#include + namespace { @@ -189,8 +191,8 @@ void ReplxxLineReader::openEditor() return; } - String editor = std::getenv("EDITOR"); - if (editor.empty()) + const char * editor = std::getenv("EDITOR"); + if (!editor || !*editor) editor = "vim"; replxx::Replxx::State state(rx.get_state()); @@ -204,7 +206,7 @@ void ReplxxLineReader::openEditor() if ((-1 == res || 0 == res) && errno != EINTR) { rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str()); - return; + break; } bytes_written += res; } @@ -215,7 +217,7 @@ void ReplxxLineReader::openEditor() return; } - if (0 == execute(editor + " " + filename)) + if (0 == execute(fmt::format("{} {}", editor, filename))) { try { diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h index 8df037a14af..fd557fd5b2d 100644 --- a/base/common/arithmeticOverflow.h +++ b/base/common/arithmeticOverflow.h @@ -1,6 +1,8 @@ #pragma once #include +#include + namespace common { @@ -156,4 +158,11 @@ namespace common return false; return (x * y) / y != x; } + + /// Multiply and ignore overflow. + template + inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y) + { + return x * y; + } } diff --git a/base/common/defines.h b/base/common/defines.h index 39df4698b88..845a53179ef 100644 --- a/base/common/defines.h +++ b/base/common/defines.h @@ -84,10 +84,12 @@ # define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) # define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) # define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. # define NO_SANITIZE_UNDEFINED # define NO_SANITIZE_ADDRESS # define NO_SANITIZE_THREAD +# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE #endif /// A template function for suppressing warnings about unused variables or function results. diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 4cf8a8d7ce9..db7019d3572 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -152,7 +152,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(10); + sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace call_default_signal_handler(sig); } @@ -230,10 +230,10 @@ public: } else { - siginfo_t info; - ucontext_t context; + siginfo_t info{}; + ucontext_t context{}; StackTrace stack_trace(NoCapture{}); - UInt32 thread_num; + UInt32 thread_num{}; std::string query_id; DB::ThreadStatus * thread_ptr{}; @@ -311,7 +311,8 @@ private: if (stack_trace.getSize()) { /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE This still require memory allocations and mutex lock inside logger. BTW we can also print it to stderr using write syscalls. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; @@ -324,7 +325,7 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); -#if defined(__linux__) +#if defined(OS_LINUX) /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. String calculated_binary_hash = getHashOfLoadedBinaryHex(); if (daemon.stored_binary_hash.empty()) @@ -561,6 +562,7 @@ void debugIncreaseOOMScore() { DB::WriteBufferFromFile buf("/proc/self/oom_score_adj"); buf.write(new_score.c_str(), new_score.size()); + buf.close(); } catch (const Poco::Exception & e) { @@ -783,7 +785,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() /// Setup signal handlers. /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals); + addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals); addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals); addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals); @@ -986,7 +988,7 @@ void BaseDaemon::setupWatchdog() if (errno == ECHILD) { logger().information("Child process no longer exists."); - _exit(status); + _exit(WEXITSTATUS(status)); } if (WIFEXITED(status)) @@ -1020,7 +1022,7 @@ void BaseDaemon::setupWatchdog() /// Automatic restart is not enabled but you can play with it. #if 1 - _exit(status); + _exit(WEXITSTATUS(status)); #else logger().information("Will restart."); if (argv0) diff --git a/base/glibc-compatibility/musl/sched_getcpu.c b/base/glibc-compatibility/musl/sched_getcpu.c index 57b8b416043..f290f01d153 100644 --- a/base/glibc-compatibility/musl/sched_getcpu.c +++ b/base/glibc-compatibility/musl/sched_getcpu.c @@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init; int sched_getcpu(void) { int r; - unsigned cpu; + unsigned cpu = 0; #ifdef VDSO_GETCPU_SYM getcpu_f f = (getcpu_f)vdso_func; diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt index b410c38cfad..849c58a8527 100644 --- a/base/mysqlxx/CMakeLists.txt +++ b/base/mysqlxx/CMakeLists.txt @@ -3,7 +3,6 @@ add_library (mysqlxx Exception.cpp Query.cpp ResultBase.cpp - StoreQueryResult.cpp UseQueryResult.cpp Row.cpp Value.cpp diff --git a/base/mysqlxx/Connection.cpp b/base/mysqlxx/Connection.cpp index 55757008562..8a15115cb06 100644 --- a/base/mysqlxx/Connection.cpp +++ b/base/mysqlxx/Connection.cpp @@ -116,8 +116,8 @@ void Connection::connect(const char* db, if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag)) throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); - /// Sets UTF-8 as default encoding. - if (mysql_set_character_set(driver.get(), "UTF8")) + /// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/ + if (mysql_set_character_set(driver.get(), "utf8mb4")) throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); is_connected = true; diff --git a/base/mysqlxx/Connection.h b/base/mysqlxx/Connection.h index 0e5a608108c..ca67db0e0c6 100644 --- a/base/mysqlxx/Connection.h +++ b/base/mysqlxx/Connection.h @@ -39,7 +39,6 @@ private: /** MySQL connection. * Usage: * mysqlxx::Connection connection("Test", "127.0.0.1", "root", "qwerty", 3306); - * std::cout << connection.query("SELECT 'Hello, World!'").store().at(0).at(0).getString() << std::endl; * * Or with Poco library configuration: * mysqlxx::Connection connection("mysql_params"); diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp index ab9bb174d4a..f3485c54edc 100644 --- a/base/mysqlxx/Query.cpp +++ b/base/mysqlxx/Query.cpp @@ -71,16 +71,6 @@ UseQueryResult Query::use() return UseQueryResult(res, conn, this); } -StoreQueryResult Query::store() -{ - executeImpl(); - MYSQL_RES * res = mysql_store_result(conn->getDriver()); - if (!res) - checkError(conn->getDriver()); - - return StoreQueryResult(res, conn, this); -} - void Query::execute() { executeImpl(); diff --git a/base/mysqlxx/Query.h b/base/mysqlxx/Query.h index 1d3ab9678d5..036e8952bc3 100644 --- a/base/mysqlxx/Query.h +++ b/base/mysqlxx/Query.h @@ -3,7 +3,6 @@ #include #include -#include namespace mysqlxx @@ -46,11 +45,6 @@ public: */ UseQueryResult use(); - /** Выполнить запрос с загрузкой на клиента всех строк. - * Требуется оперативка, чтобы вместить весь результат, зато к строкам можно обращаться в произвольном порядке. - */ - StoreQueryResult store(); - /// Значение auto increment после последнего INSERT-а. UInt64 insertID(); diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h index 4f2ab2eb0a2..d08922a269c 100644 --- a/base/mysqlxx/ResultBase.h +++ b/base/mysqlxx/ResultBase.h @@ -9,7 +9,7 @@ class Connection; class Query; -/** Базовый класс для UseQueryResult и StoreQueryResult. +/** Базовый класс для UseQueryResult. * Содержит общую часть реализации, * Ссылается на Connection. Если уничтожить Connection, то пользоваться ResultBase и любым результатом нельзя. * Использовать объект можно только для результата одного запроса! diff --git a/base/mysqlxx/Row.h b/base/mysqlxx/Row.h index a0b88638546..d668fdbd29a 100644 --- a/base/mysqlxx/Row.h +++ b/base/mysqlxx/Row.h @@ -35,7 +35,7 @@ public: { } - /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult или StoreQueryResult. */ + /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult. */ Row(MYSQL_ROW row_, ResultBase * res_, MYSQL_LENGTHS lengths_) : row(row_), res(res_), lengths(lengths_) { diff --git a/base/mysqlxx/StoreQueryResult.cpp b/base/mysqlxx/StoreQueryResult.cpp deleted file mode 100644 index 620ed8def56..00000000000 --- a/base/mysqlxx/StoreQueryResult.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#if __has_include() -#include -#else -#include -#endif - -#include -#include - - -namespace mysqlxx -{ - -StoreQueryResult::StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_) : ResultBase(res_, conn_, query_) -{ - UInt64 rows = mysql_num_rows(res); - reserve(rows); - lengths.resize(rows * num_fields); - - for (UInt64 i = 0; MYSQL_ROW row = mysql_fetch_row(res); ++i) - { - MYSQL_LENGTHS lengths_for_row = mysql_fetch_lengths(res); - memcpy(&lengths[i * num_fields], lengths_for_row, sizeof(lengths[0]) * num_fields); - - push_back(Row(row, this, &lengths[i * num_fields])); - } - checkError(conn->getDriver()); -} - -} diff --git a/base/mysqlxx/StoreQueryResult.h b/base/mysqlxx/StoreQueryResult.h deleted file mode 100644 index 9c242d2782f..00000000000 --- a/base/mysqlxx/StoreQueryResult.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include - -#include -#include - - -namespace mysqlxx -{ - -class Connection; - - -/** Результат выполнения запроса, загруженный полностью на клиента. - * Это требует оперативку, чтобы вместить весь результат, - * но зато реализует произвольный доступ к строкам по индексу. - * Если размер результата большой - используйте лучше UseQueryResult. - * Объект содержит ссылку на Connection. - * Если уничтожить Connection, то объект становится некорректным и все строки результата - тоже. - * Если задать следующий запрос в соединении, то объект и все строки тоже становятся некорректными. - * Использовать объект можно только для результата одного запроса! - * (При попытке присвоить объекту результат следующего запроса - UB.) - */ -class StoreQueryResult : public std::vector, public ResultBase -{ -public: - StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_); - - size_t num_rows() const { return size(); } - -private: - - /** Не смотря на то, что весь результат выполнения запроса загружается на клиента, - * и все указатели MYSQL_ROW на отдельные строки различные, - * при этом функция mysql_fetch_lengths() возвращает длины - * для текущей строки по одному и тому же адресу. - * То есть, чтобы можно было пользоваться несколькими Row одновременно, - * необходимо заранее куда-то сложить все длины. - */ - using Lengths = std::vector; - Lengths lengths; -}; - -} diff --git a/base/mysqlxx/UseQueryResult.h b/base/mysqlxx/UseQueryResult.h index 3a641020dcf..37cbbd19669 100644 --- a/base/mysqlxx/UseQueryResult.h +++ b/base/mysqlxx/UseQueryResult.h @@ -12,8 +12,7 @@ class Connection; /** Результат выполнения запроса, предназначенный для чтения строк, одна за другой. * В памяти при этом хранится только одна, текущая строка. - * В отличие от StoreQueryResult, произвольный доступ к строкам невозможен, - * а также, при чтении следующей строки, предыдущая становится некорректной. + * При чтении следующей строки, предыдущая становится некорректной. * Вы обязаны прочитать все строки из результата * (вызывать функцию fetch(), пока она не вернёт значение, преобразующееся к false), * иначе при следующем запросе будет выкинуто исключение с текстом "Commands out of sync". diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h index dfa86e8aa7d..57cfd452045 100644 --- a/base/mysqlxx/Value.h +++ b/base/mysqlxx/Value.h @@ -25,7 +25,7 @@ class ResultBase; /** Represents a single value read from MySQL. * It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t). - * If the UseQueryResult/StoreQueryResult or Connection is destroyed, + * If the UseQueryResult or Connection is destroyed, * or you have read the next Row while using UseQueryResult, then the object is invalidated. * Allows to transform (parse) the value to various data types: * - with getUInt(), getString(), ... (recommended); diff --git a/base/mysqlxx/tests/mysqlxx_test.cpp b/base/mysqlxx/tests/mysqlxx_test.cpp index cf304a5cb5f..c505d34a58d 100644 --- a/base/mysqlxx/tests/mysqlxx_test.cpp +++ b/base/mysqlxx/tests/mysqlxx_test.cpp @@ -38,15 +38,6 @@ int main(int, char **) } } - { - mysqlxx::Query query = connection.query(); - query << "SELECT 1234567890 abc, 12345.67890 def UNION ALL SELECT 9876543210, 98765.43210"; - mysqlxx::StoreQueryResult result = query.store(); - - std::cerr << result.at(0)["abc"].getUInt() << ", " << result.at(0)["def"].getDouble() << std::endl - << result.at(1)["abc"].getUInt() << ", " << result.at(1)["def"].getDouble() << std::endl; - } - { mysqlxx::UseQueryResult result = connection.query("SELECT 'abc\\\\def' x").use(); mysqlxx::Row row = result.fetch(); @@ -54,27 +45,6 @@ int main(int, char **) std::cerr << row << std::endl; } - { - mysqlxx::Query query = connection.query("SEL"); - query << "ECT 1"; - - std::cerr << query.store().at(0).at(0) << std::endl; - } - - { - /// Копирование Query - mysqlxx::Query query = connection.query("SELECT 'Ok' x"); - using Queries = std::vector; - Queries queries; - queries.push_back(query); - - for (auto & q : queries) - { - std::cerr << q.str() << std::endl; - std::cerr << q.store().at(0) << std::endl; - } - } - { /// Копирование Query mysqlxx::Query query1 = connection.query("SELECT"); @@ -84,62 +54,6 @@ int main(int, char **) std::cerr << query1.str() << ", " << query2.str() << std::endl; } - { - /// Копирование Query - using Queries = std::list; - Queries queries; - queries.push_back(connection.query("SELECT")); - mysqlxx::Query & qref = queries.back(); - qref << " 1"; - - for (auto & query : queries) - { - std::cerr << query.str() << std::endl; - std::cerr << query.store().at(0) << std::endl; - } - } - - { - /// Транзакции - connection.query("DROP TABLE IF EXISTS tmp").execute(); - connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - mysqlxx::Transaction trans(connection); - connection.query("INSERT INTO tmp VALUES (1)").execute(); - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - - trans.rollback(); - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - { - /// Транзакции - connection.query("DROP TABLE IF EXISTS tmp").execute(); - connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - { - mysqlxx::Transaction trans(connection); - connection.query("INSERT INTO tmp VALUES (1)").execute(); - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - { - /// Транзакции - mysqlxx::Connection connection2("test", "127.0.0.1", "root", "qwerty", 3306); - connection2.query("DROP TABLE IF EXISTS tmp").execute(); - connection2.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - mysqlxx::Transaction trans(connection2); - connection2.query("INSERT INTO tmp VALUES (1)").execute(); - std::cerr << connection2.query("SELECT * FROM tmp").store().size() << std::endl; - } - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - { /// NULL mysqlxx::Null x = mysqlxx::null; @@ -152,59 +66,6 @@ int main(int, char **) std::cerr << (x == 1 ? "Ok" : "Fail") << std::endl; std::cerr << (x.isNull() ? "Fail" : "Ok") << std::endl; } - - { - /// Исключения при попытке достать значение не того типа - try - { - connection.query("SELECT -1").store().at(0).at(0).getUInt(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT 'xxx'").store().at(0).at(0).getInt(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT NULL").store().at(0).at(0).getString(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT 123").store().at(0).at(0).getDate(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT '2011-01-01'").store().at(0).at(0).getDateTime(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - } } catch (const mysqlxx::Exception & e) { diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 5d643cc4bee..ce92ae203ea 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54447) +SET(VERSION_REVISION 54448) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 2) +SET(VERSION_MINOR 3) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7) -SET(VERSION_DESCRIBE v21.2.1.1-prestable) -SET(VERSION_STRING 21.2.1.1) +SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc) +SET(VERSION_DESCRIBE v21.3.1.1-prestable) +SET(VERSION_STRING 21.3.1.1) # end of autochange diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake index d31fe9c1de8..7fa5251946e 100644 --- a/cmake/find/nuraft.cmake +++ b/cmake/find/nuraft.cmake @@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt") return() endif () -if (NOT OS_FREEBSD) +if (NOT OS_FREEBSD AND NOT OS_DARWIN) set (USE_NURAFT 1) set (NURAFT_LIBRARY nuraft) @@ -20,5 +20,5 @@ if (NOT OS_FREEBSD) message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}") else() set (USE_NURAFT 0) - message (STATUS "Using internal NuRaft library on FreeBSD is not supported") + message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported") endif() diff --git a/contrib/NuRaft b/contrib/NuRaft index 410bd149da8..7adf7ae33e7 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1 +Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793 diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 63b4e324d29..a295ee45b84 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -11,7 +11,7 @@ endif () target_compile_options(base64_scalar PRIVATE -falign-loops) if (ARCH_AMD64) - target_compile_options(base64_ssse3 PRIVATE -mssse3 -falign-loops) + target_compile_options(base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) target_compile_options(base64_avx PRIVATE -falign-loops -mavx) target_compile_options(base64_avx2 PRIVATE -falign-loops -mavx2) else () diff --git a/contrib/boost b/contrib/boost index 8e259cd2a6b..48f40ebb539 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1 +Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79 diff --git a/contrib/hyperscan b/contrib/hyperscan index 3907fd00ee8..e9f08df0213 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531 +Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt index c44214cded8..75c45ff7bf5 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -252,6 +252,7 @@ if (NOT EXTERNAL_HYPERSCAN_LIBRARY_FOUND) target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) target_compile_options (hyperscan PRIVATE -g0 # Library has too much debug information + -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system -fno-sanitize=undefined # Assume the library takes care of itself ) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index e5bb7f7d11b..83137fe73bf 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -30,7 +30,12 @@ set(SRCS add_library(nuraft ${SRCS}) -target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) + +if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY) + target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1) +else() + target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) +endif() target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft) # for some reason include "asio.h" directly without "boost/" prefix. diff --git a/contrib/poco b/contrib/poco index 2c32e17c7df..fbaaba4a02e 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823 +Subproject commit fbaaba4a02e29987b8c584747a496c79528f125f diff --git a/debian/changelog b/debian/changelog index 1cec020f026..53b36cae114 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.2.1.1) unstable; urgency=low +clickhouse (21.3.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Mon, 11 Jan 2021 11:12:08 +0300 + -- clickhouse-release Mon, 01 Feb 2021 12:50:53 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 5022687c47b..43921a4d3c4 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 3528ae68ef6..8e39af5646c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* ARG gosu_ver=1.10 # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/README.md b/docker/server/README.md index d8e9204dffa..6f799d68185 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- 20.12.3.3 ``` -### Volumes +### Volumes Typically you may want to mount the following folders inside your container to archieve persistency: @@ -76,7 +76,7 @@ You may also want to mount: * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). -### Linux capabilities +### Linux capabilities ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). @@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv ### How to create default database and user on starting -Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`: +Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: ``` -$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server +$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server ``` ## How to extend this image diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh index 0142149b5bd..329888f2fcb 100755 --- a/docker/server/alpine-build.sh +++ b/docker/server/alpine-build.sh @@ -54,8 +54,10 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAIN docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" +docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc" docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull rm -rf "$CONTAINER_ROOT_FOLDER" diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 8a4d02a6014..0138a165505 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_ CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" +CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" for dir in "$DATA_DIR" \ "$ERROR_LOG_DIR" \ @@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL ${CLICKHOUSE_PASSWORD} default + ${CLICKHOUSE_ACCESS_MANAGEMENT} @@ -120,7 +122,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then sleep 1 done - clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" ) + clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" ) echo diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index df918928f99..f151ae8fddf 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 7555b5591d0..e6294b5d74d 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -120,7 +120,7 @@ function clone_root git checkout FETCH_HEAD echo 'Clonned merge head' else - git fetch + git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head" git checkout "$COMMIT_SHA" echo 'Checked out to commit' fi @@ -163,6 +163,7 @@ function clone_submodules contrib/xz contrib/dragonbox contrib/fast_float + contrib/NuRaft ) git submodule sync @@ -182,6 +183,7 @@ function run_cmake "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1" + "-DENABLE_NURAFT=1" ) # TODO remove this? we don't use ccache anyway. An option would be to download it @@ -251,8 +253,12 @@ function run_tests 00701_rollup 00834_cancel_http_readonly_queries_on_client_close 00911_tautological_compare + + # Hyperscan 00926_multimatch 00929_multi_match_edit_distance + 01681_hyperscan_debug_assertion + 01031_mutations_interpreter_and_context 01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled 01083_expressions_in_engine_arguments @@ -315,6 +321,7 @@ function run_tests # In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default 01504_rocksdb + 01686_rocksdb # Look at DistributedFilesToInsert, so cannot run in parallel. 01460_DistributedFilesToInsert diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 9af401238a3..766fec76179 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -190,7 +190,7 @@ case "$stage" in # Lost connection to the server. This probably means that the server died # with abort. echo "failure" > status.txt - if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt + if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 9b51891ccf5..502dc3736b2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -61,7 +61,8 @@ RUN python3 -m pip install \ aerospike \ avro \ cassandra-driver \ - confluent-kafka \ + confluent-kafka==1.5.0 \ + dict2xml \ dicttoxml \ docker \ docker-compose==1.22.0 \ diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index a74476613f3..f2a659bce58 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -14,7 +14,7 @@ services: ports: - 1006:1006 - 50070:50070 - - 9000:9000 + - 9010:9010 depends_on: - hdfskerberos entrypoint: /etc/bootstrap.sh -d diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 48479161ef9..f1c5df146aa 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') +parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') @@ -323,7 +324,7 @@ for query_index in queries_to_run: server_seconds += elapsed print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') - if elapsed > 10: + if elapsed > args.max_query_seconds: # Stop processing pathologically slow queries, to avoid timing out # the entire test task. This shouldn't really happen, so we don't # need much handling for this case and can just exit. diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index fb510a87fcd..575be721a54 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -53,10 +53,12 @@ function run_tests() if [ "$NUM_TRIES" -gt "1" ]; then ADDITIONAL_OPTIONS+=('--skip') ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip') + ADDITIONAL_OPTIONS+=('--jobs') + ADDITIONAL_OPTIONS+=('4') fi clickhouse-test --testname --shard --zookeeper --hung-check --print-time \ - --test-runs "$NUM_TRIES" --jobs 4 \ + --test-runs "$NUM_TRIES" \ "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile index 4d0274143d6..58846f90fa7 100644 --- a/docker/test/stateless_pytest/Dockerfile +++ b/docker/test/stateless_pytest/Dockerfile @@ -5,7 +5,10 @@ RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ python3-pip \ python3-setuptools \ - python3-wheel + python3-wheel \ + brotli \ + netcat-openbsd \ + zstd RUN python3 -m pip install \ wheel \ @@ -15,7 +18,10 @@ RUN python3 -m pip install \ pytest-randomly \ pytest-rerunfailures \ pytest-timeout \ - pytest-xdist + pytest-xdist \ + pandas \ + numpy \ + scipy CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \ diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 9da2f3d3ada..88a633ac488 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -64,7 +64,7 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "SHOW TABLES FROM test" -./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" +./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt stop start diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 458f78fcdb4..d2ec86b4421 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -1,8 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- from multiprocessing import cpu_count -from subprocess import Popen, check_call +from subprocess import Popen, call, STDOUT import os +import sys import shutil import argparse import logging @@ -64,7 +65,8 @@ if __name__ == "__main__": parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server') parser.add_argument("--output-folder") parser.add_argument("--global-time-limit", type=int, default=3600) - parser.add_argument("--num-parallel", default=cpu_count()); + parser.add_argument("--num-parallel", default=cpu_count()) + parser.add_argument('--hung-check', action='store_true', default=False) args = parser.parse_args() func_pipes = [] @@ -81,4 +83,13 @@ if __name__ == "__main__": logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes)) time.sleep(5) + logging.info("All processes finished") + if args.hung_check: + logging.info("Checking if some queries hung") + cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1") + res = call(cmd, shell=True, stderr=STDOUT) + if res != 0: + logging.info("Hung check failed with exit code {}".format(res)) + sys.exit(1) + logging.info("Stress test finished") diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7047007d2fc..74af8eafc17 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,12 +1,16 @@ # docker build -t yandex/clickhouse-style-test . FROM ubuntu:20.04 -RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell +RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell +# For |& syntax +SHELL ["bash", "-c"] + CMD cd /ClickHouse/utils/check-style && \ - ./check-style -n | tee /test_output/style_output.txt && \ - ./check-typos | tee /test_output/typos_output.txt && \ - ./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \ - ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \ - ./shellcheck-run.sh | tee /test_output/shellcheck_output.txt + ./check-style -n |& tee /test_output/style_output.txt && \ + ./check-typos |& tee /test_output/typos_output.txt && \ + ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt && \ + ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \ + ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt && \ + true diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md new file mode 100644 index 00000000000..edb6586ee7d --- /dev/null +++ b/docs/_description_templates/template-data-type.md @@ -0,0 +1,29 @@ +--- +toc_priority: +toc_title: +--- + +# data_type_name {#data_type-name} + +Description. + +**Parameters** (Optional) + +- `x` — Description. [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. [Type name](relative/path/to/type/dscr.md#type). + +**Examples** + +```sql + +``` + +## Additional Info {#additional-info} (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) + +[Original article](https://clickhouse.tech/docs/en/data_types//) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index b69d7ed5309..a0074a76ef6 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -12,16 +12,20 @@ Alias: ``. (Optional) More text (Optional). -**Parameters** (Optional) +**Arguments** (Optional) - `x` — Description. [Type name](relative/path/to/type/dscr.md#type). - `y` — Description. [Type name](relative/path/to/type/dscr.md#type). +**Parameters** (Optional, only for parametric aggregate functions) + +- `z` — Description. [Type name](relative/path/to/type/dscr.md#type). + **Returned value(s)** -- Returned values list. +- Returned values list. -Type: [Type](relative/path/to/type/dscr.md#type). +Type: [Type name](relative/path/to/type/dscr.md#type). **Example** diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md index 3fdf9788d79..f2decc4bb6d 100644 --- a/docs/_description_templates/template-system-table.md +++ b/docs/_description_templates/template-system-table.md @@ -8,10 +8,14 @@ Columns: **Example** +Query: + ``` sql SELECT * FROM system.table_name ``` +Result: + ``` text Some output. It shouldn't be too long. ``` diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 60365ad744a..e0b1be710f1 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -40,7 +40,7 @@ $ cd ClickHouse ``` bash $ mkdir build $ cd build -$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm` +$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm` $ ninja $ cd .. ``` diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index 89fe9304c4c..2e361cc82f0 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine. - Replication can be easily broken. - Manual operations on database and tables are forbidden. +- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes. ## Examples of Use {#examples-of-use} @@ -156,4 +157,4 @@ SELECT * FROM mysql.test; └───┴─────┴──────┘ ``` -[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) +[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 857e148277c..6e864751cc3 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/). -`EmbeddedRocksDB` lets you: - ## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table} ``` sql @@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: - `primary_key_name` – any column name in the column list. +- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`. +- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order. +- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`. Example: @@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB PRIMARY KEY key ``` -## Description {#description} - -- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key. -- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order. -- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb. +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index cf3e36c2f48..288c9c3cd56 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -12,6 +12,9 @@ List of supported integrations: - [ODBC](../../../engines/table-engines/integrations/odbc.md) - [JDBC](../../../engines/table-engines/integrations/jdbc.md) - [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md) - [S3](../../../engines/table-engines/integrations/s3.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md new file mode 100644 index 00000000000..e648a13b5e0 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -0,0 +1,57 @@ +--- +toc_priority: 7 +toc_title: MongoDB +--- + +# MongoDB {#mongodb} + +MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported. + +## Creating a Table {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name +( + name1 [type1], + name2 [type2], + ... +) ENGINE = MongoDB(host:port, database, collection, user, password); +``` + +**Engine Parameters** + +- `host:port` — MongoDB server address. + +- `database` — Remote database name. + +- `collection` — Remote collection name. + +- `user` — MongoDB user. + +- `password` — User password. + +## Usage Example {#usage-example} + +Table in ClickHouse which allows to read data from MongoDB collection: + +``` text +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); +``` + +Query: + +``` sql +SELECT COUNT() FROM mongo_table; +``` + +``` text +┌─count()─┐ +│ 4 │ +└─────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index b0901ee6f6e..c73876fdebe 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -59,10 +59,11 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Required configuration: The RabbitMQ server configuration should be added using the ClickHouse config file. +Required configuration: + ``` xml root @@ -70,6 +71,14 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi ``` +Additional configuration: + +``` xml + + clickhouse + +``` + Example: ``` sql diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index d8cceb4d511..5858a0803e6 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint - `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint. - `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. - `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. - -This configuration also applies to S3 disks in `MergeTree` table engine family. +- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Example: @@ -149,6 +148,7 @@ Example: + ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 084d05ec0a0..753859b46d2 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -45,7 +45,10 @@ ORDER BY expr [PARTITION BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] +[TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] [SETTINGS name=value, ...] ``` @@ -80,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql Expression must have one `Date` or `DateTime` column as a result. Example: `TTL date + INTERVAL 1 DAY` - Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule. + Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule. For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) @@ -101,7 +104,8 @@ For a description of parameters, see the [CREATE query description](../../../sql - `max_parts_in_total` — Maximum number of parts in all partitions. - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - + - `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting. + **Example of Sections Setting** ``` sql @@ -455,18 +459,28 @@ ALTER TABLE example_table Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria. ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ``` Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time): - `DELETE` - delete expired rows (default action); - `TO DISK 'aaa'` - move part to the disk `aaa`; -- `TO VOLUME 'bbb'` - move part to the disk `bbb`. +- `TO VOLUME 'bbb'` - move part to the disk `bbb`; +- `GROUP BY` - aggregate expired rows. -Examples: +With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves). -Creating a table with TTL +`GROUP BY` expression must be a prefix of the table primary key. + +If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it). + +**Examples** + +Creating a table with TTL: ``` sql CREATE TABLE example_table @@ -482,13 +496,43 @@ TTL d + INTERVAL 1 MONTH [DELETE], d + INTERVAL 2 WEEK TO DISK 'bbb'; ``` -Altering TTL of the table +Altering TTL of the table: ``` sql ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +Creating a table, where the rows are expired after one month. The expired rows where dates are Mondays are deleted: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows. + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY k1, k2 +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **Removing Data** Data with an expired TTL is removed when ClickHouse merges data parts. @@ -671,6 +715,7 @@ Configuration markup: https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + your_base64_encoded_customer_key http://proxy1 http://proxy2 @@ -706,7 +751,8 @@ Optional parameters: - `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. - `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. - `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. -- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. S3 disk can be configured as `main` or `cold` storage: diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md index 5addc455602..fdf1f1f290e 100644 --- a/docs/en/faq/operations/delete-old-data.md +++ b/docs/en/faq/operations/delete-old-data.md @@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. -More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition). +More details on [table truncation](../../sql-reference/statements/truncate.md). diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index 64363c963c5..fe697972dff 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first ``` sql CREATE TABLE tutorial.hits_replica (...) -ENGINE = ReplcatedMergeTree( +ENGINE = ReplicatedMergeTree( '/clickhouse_perftest/tables/{shard}/hits', '{replica}' ) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 11291d61300..33bf90a8b52 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -31,8 +31,8 @@ The supported formats are: | [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ | -| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | | [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | | [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | | [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ | @@ -612,7 +612,7 @@ Example: ``` ## JSONEachRow {#jsoneachrow} -## JSONStringEachRow {#jsonstringeachrow} +## JSONStringsEachRow {#jsonstringseachrow} ## JSONCompactEachRow {#jsoncompacteachrow} ## JSONCompactStringEachRow {#jsoncompactstringeachrow} @@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite When inserting the data, you should provide a separate JSON value for each row. ## JSONEachRowWithProgress {#jsoneachrowwithprogress} -## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress} +## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress} -Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values. +Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values. ```json {"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 2684e6fdd3a..454d856f779 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -8,118 +8,120 @@ toc_title: Adopters !!! warning "Disclaimer" The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | -| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | -| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | -| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | -| Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | +|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------| +| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | +| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | +| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | +| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | +| Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | -| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | +| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | | CardsMobile | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) | -| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | -| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | -| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | -| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | -| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | -| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | -| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | -| FunCorp | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | -| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | -| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | -| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | -| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | -| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | -| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | -| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | -| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | -| Mello | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | -| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x -| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | -| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | -| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | -| OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | -| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | -| PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | -| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | -| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | -| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | -| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | -| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | -| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | -| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | -| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | -| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | -| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | -| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | -| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | -| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | -| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | -| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | +| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | +| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | +| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | +| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | +| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | +| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | +| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | +| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | +| FunCorp | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | +| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | +| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | +| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | +| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | +| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | +| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | +| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | +| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | +| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | +| Mello | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | +| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x +| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | +| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | +| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | +| OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | +| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | +| PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | +| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | +| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| PRANA | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) | +| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | +| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | +| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | +| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | +| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | +| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | +| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | +| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | +| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | +| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | +| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | +| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | +| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | +| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | +| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | +| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index ea37a22c165..f4206f5d70c 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -5,7 +5,7 @@ toc_title: Data Backup # Data Backup {#data-backup} -While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented. +While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards don’t cover all possible cases and can be circumvented. In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**. diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index c637ef03f71..56c3eaf6455 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -29,6 +29,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas. 0 + 0 + 0 0 0 0 @@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo 3600 1000 + 100 + 100 100 1000000000 100000000000 @@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo 86400 10000 + 10000 + 10000 1000 5000000000 500000000000 @@ -74,6 +80,10 @@ Here are the amounts that can be restricted: `queries` – The total number of requests. +`query_selects` – The total number of select requests. + +`query_inserts` – The total number of insert requests. + `errors` – The number of queries that threw an exception. `result_rows` – The total number of rows given as a result. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a1ed34f10bb..89fcbafe663 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -296,11 +296,33 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` +## interserver_https_port {#interserver-https-port} + +Port for exchanging data between ClickHouse servers over `HTTPS`. + +**Example** + +``` xml +9010 +``` + +## interserver_https_host {#interserver-https-host} + +Similar to `interserver_http_host`, except that this hostname can be used by other servers to access this server over `HTTPS`. + +**Example** + +``` xml +example.yandex.ru +``` + ## interserver_http_credentials {#server-settings-interserver-http-credentials} The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. By default, the authentication is not used. +**Note:** These credentials are common for replication through `HTTP` and `HTTPS`. + This section contains the following parameters: - `user` — username. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index e0f7c79dcab..77b68715ba9 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -186,5 +186,16 @@ Possible values: Default value: auto (number of CPU cores). During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound). +## max_partitions_to_read {#max-partitions-to-read} + +Limits the maximum number of partitions that can be accessed in one query. + +The setting value specified when the table is created can be overridden via query-level setting. + +Possible values: + +- Any positive integer. + +Default value: -1 (unlimited). [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 85a3b8bd941..43519bfc8dc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format. Possible values: - 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs +- 1 — Enum values are parsed as enum IDs. Default value: 0. @@ -428,7 +428,7 @@ Possible values: - `'basic'` — Use basic parser. - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`. + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. Default value: `'basic'`. @@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and Possible values: -- `'simple'` - Simple output format. +- `simple` - Simple output format. - Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone. + Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. -- `'iso'` - ISO output format. +- `iso` - ISO output format. - Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC). + Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). -- `'unix_timestamp'` - Unix timestamp output format. +- `unix_timestamp` - Unix timestamp output format. - Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`. + Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. -Default value: `'simple'`. +Default value: `simple`. See also: @@ -1944,6 +1944,21 @@ Possible values: Default value: 16. +## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} + +Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session. + +Possible values: + +- Any positive integer. + +Default value: 16. + +**See Also** + +- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine +- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine + ## validate_polygons {#validate_polygons} Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. @@ -2577,4 +2592,70 @@ Possible values: Default value: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied). + +Possible values: + +- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- 1 — The trace for all executed queries is enabled. + +Default value: `0`. + +## optimize_on_insert {#optimize-on-insert} + +Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine). + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +**Example** + +The difference between enabled and disabled: + +Query: + +```sql +SET optimize_on_insert = 1; + +CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; + +INSERT INTO test1 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test1; + +SET optimize_on_insert = 0; + +CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; + +INSERT INTO test2 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test2; +``` + +Result: + +``` text +┌─FirstTable─┐ +│ 0 │ +│ 1 │ +└────────────┘ + +┌─SecondTable─┐ +│ 0 │ +│ 0 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────┘ +``` + +Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 643bdee6def..c252458af8a 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -1,22 +1,21 @@ # system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} -Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster. +Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster. Columns: -- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port. -- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query. -- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name. -- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query. -- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time. -- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds. -- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper. - +- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper). **Example** @@ -62,6 +61,5 @@ exception_code: ZOK 2 rows in set. Elapsed: 0.025 sec. ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) \ No newline at end of file diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..e45a989742c --- /dev/null +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,53 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries. + +Columns: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. + +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. + +**Example** + +Query: + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +**See Also** + +- [OpenTelemetry](../../operations/opentelemetry.md) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 9aa95b1a493..579fdaefb0a 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: -- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NEW_PART` — Inserting of a new data part. - `MERGE_PARTS` — Merging of data parts. - `DOWNLOAD_PART` — Downloading a data part. - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - `MUTATE_PART` — Mutating of a data part. - `MOVE_PART` — Moving the data part from the one disk to another one. -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. -- `duration_ms` (UInt64) — Duration. -- `database` (String) — Name of the database the data part is in. -- `table` (String) — Name of the table the data part is in. -- `part_name` (String) — Name of the data part. -- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`. -- `rows` (UInt64) — The number of rows in the data part. -- `size_in_bytes` (UInt64) — Size of the data part in bytes. -- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. -- `read_rows` (UInt64) — The number of rows was read during the merge. -- `read_bytes` (UInt64) — The number of bytes was read during the merge. -- `error` (UInt16) — The code number of the occurred error. -- `exception` (String) — Text message of the occurred error. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. + +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. The `system.part_log` table is created after the first inserting data to the `MergeTree` table. +**Example** + +``` sql +SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 +event_type: NewPart +event_date: 2021-02-02 +event_time: 2021-02-02 11:14:28 +event_time_microseconds: 2021-02-02 11:14:28.861919 +duration_ms: 35 +database: default +table: log_mt_2 +part_name: all_1_1_0 +partition_id: all +path_on_disk: db/data/default/log_mt_2/all_1_1_0/ +rows: 115418 +size_in_bytes: 1074311 +merged_from: [] +bytes_uncompressed: 0 +read_rows: 0 +read_bytes: 0 +peak_memory_usage: 0 +error: 0 +exception: +``` + [Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 065296f5df3..c2dcb4db34d 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -9,6 +9,8 @@ Columns: - `0` — Interval is not randomized. - `1` — Interval is randomized. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. - `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows. - `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result. diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 0eb59fd6453..17af9ad9a30 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -9,6 +9,8 @@ Columns: - `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption. - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds. - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests. - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index ed6be820b26..31aafd3e697 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -11,6 +11,10 @@ Columns: - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds. - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests. - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. - `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result. diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 8107f60b808..2903e0d3bd7 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,7 +12,7 @@ Columns: - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index ddb4d305964..82ace5e81dc 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -1,12 +1,16 @@ # system.zookeeper {#system-zookeeper} The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. +The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for. The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. To output data for all root nodes, write path = ‘/’. If the path specified in ‘path’ doesn’t exist, an exception will be thrown. +The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node. +If in the specified ‘path’ collection has doesn't exist path, an exception will be thrown. +It can be used to do a batch of ZooKeeper path queries. + Columns: - `name` (String) — The name of the node. diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md index edacf1ff973..9fa9c44e130 100644 --- a/docs/en/operations/update.md +++ b/docs/en/operations/update.md @@ -1,9 +1,9 @@ --- toc_priority: 47 -toc_title: ClickHouse Update +toc_title: ClickHouse Upgrade --- -# ClickHouse Update {#clickhouse-update} +# ClickHouse Upgrade {#clickhouse-upgrade} If ClickHouse was installed from `deb` packages, execute the following commands on the server: @@ -16,3 +16,19 @@ $ sudo service clickhouse-server restart If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method. ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time. + +The upgrade of older version of ClickHouse to specific version: + +As an example: + +`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases) + +```bash +$ sudo apt-get update +$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b +$ sudo service clickhouse-server restart +``` + + + + diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 431968bc629..015c90e90c7 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re OrDefault(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../ OrNull(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho Resample(start, end, step)(, resampling_key) ``` -**Parameters** +**Arguments** - `start` — Starting value of the whole required interval for `resampling_key` values. - `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3b02e145ff4..035bc91b9ed 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -17,10 +17,13 @@ histogram(number_of_bins)(values) The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal. +**Arguments** + +`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. + **Parameters** `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. **Returned values** @@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) !!! warning "Warning" Events that occur at the same second may lay in the sequence in an undefined order affecting the result. -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - 1, if the pattern is matched. @@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche sequenceCount(pattern)(timestamp, cond1, cond2, ...) ``` -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - Number of non-overlapping event chains that are matched. @@ -239,14 +246,17 @@ The function works according to the algorithm: windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) ``` -**Parameters** +**Arguments** -- `window` — Length of the sliding window in seconds. -- `mode` - It is an optional argument. - - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. - `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). - `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +**Parameters** + +- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`. +- `mode` - It is an optional parameter. + - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. + **Returned value** The maximum number of consecutive triggered conditions from the chain within the sliding time window. @@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will retention(cond1, cond2, ..., cond32); ``` -**Parameters** +**Arguments** - `cond` — an expression that returns a `UInt8` result (1 or 0). diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 35e87d49e60..7639117042f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -4,13 +4,42 @@ toc_priority: 106 # argMax {#agg-function-argmax} -Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))` +Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered. -Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output. +Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`. +**Syntax** -**Example:** +``` sql +argMax(arg, val) +``` + +or + +``` sql +argMax(tuple(arg, val)) +``` + +**Arguments** + +- `arg` — Argument. +- `val` — Value. + +**Returned value** + +- `arg` value that corresponds to maximum `val` value. + +Type: matches `arg` type. + +For tuple in the input: + +- Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value. + +Type: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Example** + +Input table: ``` text ┌─user─────┬─salary─┐ @@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu └──────────┴────────┘ ``` +Query: + ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary +SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary; ``` +Result: + ``` text ┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ │ director │ ('director',5000) │ └──────────────────────┴─────────────────────────────┘ ``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 72c9bce6817..7ddc38cd28a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -4,13 +4,42 @@ toc_priority: 105 # argMin {#agg-function-argmin} -Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))` +Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered. -Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output. +Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`. +**Syntax** -**Example:** +``` sql +argMin(arg, val) +``` + +or + +``` sql +argMin(tuple(arg, val)) +``` + +**Arguments** + +- `arg` — Argument. +- `val` — Value. + +**Returned value** + +- `arg` value that corresponds to minimum `val` value. + +Type: matches `arg` type. + +For tuple in the input: + +- Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value. + +Type: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Example** + +Input table: ``` text ┌─user─────┬─salary─┐ @@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu └──────────┴────────┘ ``` +Query: + ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary +SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; ``` +Result: + ``` text ┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ │ worker │ ('worker',1000) │ └──────────────────────┴─────────────────────────────┘ ``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index e2e6aace734..12dc4ac1e9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -12,7 +12,7 @@ Calculates the arithmetic mean. avgWeighted(x) ``` -**Parameter** +**Arguments** - `x` — Values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 7b9c0de2755..2df09e560b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted avgWeighted(x, weight) ``` -**Parameters** +**Arguments** - `x` — Values. - `weight` — Weights of the values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index e5d31429e12..0a5aef2fe97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`: - `count(expr)` or `COUNT(DISTINCT expr)`. - `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. -**Parameters** +**Arguments** The function can take: diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md new file mode 100644 index 00000000000..bb6f802ccaf --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -0,0 +1,19 @@ +--- +toc_priority: 141 +--- + +# deltaSum {#agg_functions-deltasum} + +Syntax: `deltaSum(value)` + +Adds the differences between consecutive rows. If the difference is negative, it is ignored. +`value` must be some integer or floating point type. + +Example: + +```sql +select deltaSum(arrayJoin([1, 2, 3])); -- => 2 +select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7 +select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25 +``` + diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index f4b8665a0a4..68456bf7844 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function - If a query is executed in a single thread, the first one of the inserted values is used. - If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. -**Parameters** +**Arguments** - `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). - `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 1cd40c2002f..c732efecf58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index ef979cd5f6a..c3dfeda850e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 36fa6a9d661..df0b8120eef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l groupArraySample(max_size[, seed])(x) ``` -**Parameters** +**Arguments** - `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). - `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 9be73fd54ec..1275ad7536c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers. groupBitAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 9367652db38..9317ef98783 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal groupBitmap(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 7c0c89040bb..f59bb541a42 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a groupBitmapAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index 894c6c90aab..a4d99fd29e3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index 5d0ec0fb097..834f088d02f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 7383e620060..e427a9ad970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers. groupBitOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 01026012b91..4b8323f92db 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers. groupBitXor(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md index ea44d5f1ddd..313d6bf81f5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -13,7 +13,7 @@ Use it for tests or to process columns of types `AggregateFunction` and `Aggrega initializeAggregation (aggregate_function, column_1, column_2); ``` -**Parameters** +**Arguments** - `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). - `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index 65e7e31b9b4..db402c99663 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -10,7 +10,7 @@ Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. kurtPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 224bbbdb9e7..4bb9f76763b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the kurtosis of a random variable if passe kurtSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md new file mode 100644 index 00000000000..12982849513 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -0,0 +1,73 @@ +--- +toc_priority: 310 +toc_title: mannWhitneyUTest +--- + +# mannWhitneyUTest {#mannwhitneyutest} + +Applies the Mann-Whitney rank test to samples from two populations. + +**Syntax** + +``` sql +mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Parameters** + +- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: +- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 10 │ 0 │ +│ 11 │ 0 │ +│ 12 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest; +``` + +Result: + +``` text +┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐ +│ (9,0.04042779918503192) │ +└────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) +- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 77f858a1735..d625ef4cfd9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -18,7 +18,7 @@ quantile(level)(expr) Alias: `median`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 6046447dd10..a20ac26f599 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -18,7 +18,7 @@ quantileDeterministic(level)(expr, determinator) Alias: `medianDeterministic`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index a39f724f368..06ef7ccfbd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -18,7 +18,7 @@ quantileExact(level)(expr) Alias: `medianExact`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -77,7 +77,7 @@ quantileExact(level)(expr) Alias: `medianExactLow`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -128,7 +128,7 @@ quantileExactHigh(level)(expr) Alias: `medianExactHigh`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 3251f8298a6..210f44e7587 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -18,7 +18,7 @@ quantileExactWeighted(level)(expr, weight) Alias: `medianExactWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index bda98ea338d..dcc665a68af 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 309cbe95e95..56ef598f7e7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index 867e8b87e74..58ce6495a96 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -18,7 +18,7 @@ quantileTiming(level)(expr) Alias: `medianTiming`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 0f8606986c8..fb3b9dbf4d2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -18,7 +18,7 @@ quantileTimingWeighted(level)(expr, weight) Alias: `medianTimingWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). @@ -79,6 +79,40 @@ Result: └───────────────────────────────────────────────┘ ``` +# quantilesTimingWeighted {#quantilestimingweighted} + +Same as `quantileTimingWeighted`, but accept multiple parameters with quantile levels and return an Array filled with many values of that quantiles. + + +**Example** + +Input table: + +``` text +┌─response_time─┬─weight─┐ +│ 68 │ 1 │ +│ 104 │ 2 │ +│ 112 │ 3 │ +│ 126 │ 2 │ +│ 138 │ 1 │ +│ 162 │ 1 │ +└───────────────┴────────┘ +``` + +Query: + +``` sql +SELECT quantilesTimingWeighted(0,5, 0.99)(response_time, weight) FROM t +``` + +Result: + +``` text +┌─quantilesTimingWeighted(0.5, 0.99)(response_time, weight)─┐ +│ [112,162] │ +└───────────────────────────────────────────────────────────┘ +``` + **See Also** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index dc23029f239..55ee1b8289b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -8,7 +8,7 @@ Computes a rank correlation coefficient. rankCorr(x, y) ``` -**Parameters** +**Arguments** - `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). - `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index d15a5ffdd47..b9dfc390f9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -10,7 +10,7 @@ Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. skewPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index cb323f4b142..f7a6df8f507 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the skewness of a random variable if passe skewSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md new file mode 100644 index 00000000000..ba10c1d62d9 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -0,0 +1,65 @@ +--- +toc_priority: 300 +toc_title: studentTTest +--- + +# studentTTest {#studentttest} + +Applies Student's t-test to samples from two populations. + +**Syntax** + +``` sql +studentTTest(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 21.1 │ 0 │ +│ 21.9 │ 1 │ +│ 21.7 │ 0 │ +│ 19.9 │ 1 │ +│ 21.8 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT studentTTest(sample_data, sample_index) FROM student_ttest; +``` + +Result: + +``` text +┌─studentTTest(sample_data, sample_index)───┐ +│ (-0.21739130434783777,0.8385421208415731) │ +└───────────────────────────────────────────┘ +``` + +**See Also** + +- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) +- [welchTTest function](welchttest.md#welchttest) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index 004a67d33af..b3e79803ba1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -16,7 +16,7 @@ This function doesn’t provide a guaranteed result. In certain situations, erro We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. -**Parameters** +**Arguments** - ‘N’ is the number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index b597317f44e..02b9f77ea6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -12,7 +12,7 @@ Similar to `topK` but takes one additional argument of integer type - `weight`. topKWeighted(N)(x, weight) ``` -**Parameters** +**Arguments** - `N` — The number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 81d1ec6761e..7ba2cdc6cb8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -10,7 +10,7 @@ Calculates the approximate number of different values of the argument. uniq(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index c52486bc38f..4434686ae61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -12,7 +12,7 @@ uniqCombined(HLL_precision)(x[, ...]) The `uniqCombined` function is a good choice for calculating the number of different values. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index 9a6224533c8..eee675016ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -14,7 +14,7 @@ Use the `uniqExact` function if you absolutely need an exact result. Otherwise u The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index fcddc22cc46..5b23ea81eae 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -10,7 +10,7 @@ Calculates the approximate number of different argument values, using the [Hyper uniqHLL12(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md new file mode 100644 index 00000000000..18cff885867 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -0,0 +1,65 @@ +--- +toc_priority: 301 +toc_title: welchTTest +--- + +# welchTTest {#welchttest} + +Applies Welch's t-test to samples from two populations. + +**Syntax** + +``` sql +welchTTest(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 22.1 │ 0 │ +│ 21.9 │ 0 │ +│ 18.9 │ 1 │ +│ 20.3 │ 1 │ +│ 19 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT welchTTest(sample_data, sample_index) FROM welch_ttest; +``` + +Result: + +``` text +┌─welchTTest(sample_data, sample_index)─────┐ +│ (2.7988719532211235,0.051807360348581945) │ +└───────────────────────────────────────────┘ +``` + +**See Also** + +- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) +- [studentTTest function](studentttest.md#studentttest) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 48957498d63..41e35aaa96f 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -45,6 +45,8 @@ SELECT [1, 2] AS x, toTypeName(x) ## Working with Data Types {#working-with-data-types} +The maximum size of an array is limited to one million elements. + When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md new file mode 100644 index 00000000000..58634e5b669 --- /dev/null +++ b/docs/en/sql-reference/data-types/map.md @@ -0,0 +1,83 @@ +--- +toc_priority: 65 +toc_title: Map(key, value) +--- + +# Map(key, value) {#data_type-map} + +`Map(key, value)` data type stores `key:value` pairs. + +**Parameters** +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). + +!!! warning "Warning" + Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`. + +To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. + +**Examples** + +Consider the table: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; +INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +``` + +Select all `key2` values: + +```sql +SELECT a['key2'] FROM table_map; +``` +Result: + +```text +┌─arrayElement(a, 'key2')─┐ +│ 10 │ +│ 20 │ +│ 30 │ +└─────────────────────────┘ +``` + +If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. + +```sql +INSERT INTO table_map VALUES ({'key3':100}), ({}); +SELECT a['key3'] FROM table_map; +``` + +Result: + +```text +┌─arrayElement(a, 'key3')─┐ +│ 100 │ +│ 0 │ +└─────────────────────────┘ +┌─arrayElement(a, 'key3')─┐ +│ 0 │ +│ 0 │ +│ 0 │ +└─────────────────────────┘ +``` + +## Convert Tuple to Map Type {#map-and-tuple} + +You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function: + +``` sql +SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map; +``` + +``` text +┌─map───────────────────────────┐ +│ {1:'Ready',2:'Steady',3:'Go'} │ +└───────────────────────────────┘ +``` + +**See Also** + +- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function +- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function + +[Original article](https://clickhouse.tech/docs/en/data-types/map/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 05c418b1f15..efef91b4b09 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that Details of the algorithm: - If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary. -- If there are overlapping ranges, you can use any. -- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides. +- If there are overlapping ranges, it returns value for any (random) range. +- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides. Configuration example: diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index dc7727bdfd8..c9c418d57a4 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -61,7 +61,7 @@ Combines arrays passed as arguments. arrayConcat(arrays) ``` -**Parameters** +**Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. **Example** @@ -111,7 +111,7 @@ Checks whether one array is a subset of another. hasAll(set, subset) ``` -**Parameters** +**Arguments** - `set` – Array of any type with a set of elements. - `subset` – Array of any type with elements that should be tested to be a subset of `set`. @@ -149,7 +149,7 @@ Checks whether two arrays have intersection by some elements. hasAny(array1, array2) ``` -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -191,7 +191,7 @@ For Example: - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`. - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`. -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -369,7 +369,7 @@ Removes the last item from the array. arrayPopBack(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -393,7 +393,7 @@ Removes the first item from the array. arrayPopFront(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -417,7 +417,7 @@ Adds one item to the end of the array. arrayPushBack(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -442,7 +442,7 @@ Adds one element to the beginning of the array. arrayPushFront(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -467,7 +467,7 @@ Changes the length of the array. arrayResize(array, size[, extender]) ``` -**Parameters:** +**Arguments:** - `array` — Array. - `size` — Required length of the array. @@ -509,7 +509,7 @@ Returns a slice of the array. arraySlice(array, offset[, length]) ``` -**Parameters** +**Arguments** - `array` – Array of data. - `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1. @@ -751,7 +751,7 @@ Calculates the difference between adjacent array elements. Returns an array wher arrayDifference(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -803,7 +803,7 @@ Takes an array, returns an array containing the distinct elements only. arrayDistinct(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -871,7 +871,7 @@ Applies an aggregate function to array elements and returns its result. The name arrayReduce(agg_func, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. @@ -936,7 +936,7 @@ Applies an aggregate function to array elements in given ranges and returns an a arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. @@ -1007,7 +1007,7 @@ flatten(array_of_arrays) Alias: `flatten`. -**Parameters** +**Arguments** - `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. @@ -1033,7 +1033,7 @@ Removes consecutive duplicate elements from an array. The order of result values arrayCompact(arr) ``` -**Parameters** +**Arguments** `arr` — The [array](../../sql-reference/data-types/array.md) to inspect. @@ -1069,7 +1069,7 @@ Combines multiple arrays into a single array. The resulting array contains the c arrayZip(arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `arrN` — [Array](../../sql-reference/data-types/array.md). @@ -1107,7 +1107,7 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see arrayAUC(arr_scores, arr_labels) ``` -**Parameters** +**Arguments** - `arr_scores` — scores prediction model gives. - `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample. @@ -1288,73 +1288,226 @@ Returns the index of the first element in the `arr1` array for which `func` retu Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayMin(\[func,\] arr1, …) {#array-min} +## arrayMin {#array-min} -Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements. +Returns the minimum of elements in the source array. + +If the `func` function is specified, returns the mininum of elements converted by this function. Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arrayMin([1, 2, 4]) AS res +arrayMin([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The minimum of function values (or the array minimum). + +Type: if `func` is specified, matches `func` return value type, else matches the array elements type. + +**Examples** + +Query: + +```sql +SELECT arrayMin([1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 1 │ └─────┘ +``` +Query: -SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res +```sql +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ -4 │ └─────┘ ``` -## arrayMax(\[func,\] arr1, …) {#array-max} +## arrayMax {#array-max} -Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements. +Returns the maximum of elements in the source array. + +If the `func` function is specified, returns the maximum of elements converted by this function. Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arrayMax([1, 2, 4]) AS res +arrayMax([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The maximum of function values (or the array maximum). + +Type: if `func` is specified, matches `func` return value type, else matches the array elements type. + +**Examples** + +Query: + +```sql +SELECT arrayMax([1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 4 │ └─────┘ +``` +Query: -SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res +```sql +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ -1 │ └─────┘ ``` -## arraySum(\[func,\] arr1, …) {#array-sum} +## arraySum {#array-sum} -Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements. +Returns the sum of elements in the source array. + +If the `func` function is specified, returns the sum of elements converted by this function. Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arraySum([2,3]) AS res +arraySum([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The sum of the function values (or the array sum). + +Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +```sql +SELECT arraySum([2, 3]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 5 │ └─────┘ +``` +Query: -SELECT arraySum(x -> x*x, [2, 3]) AS res +```sql +SELECT arraySum(x -> x*x, [2, 3]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 13 │ └─────┘ ``` +## arrayAvg {#array-avg} -## arrayAvg(\[func,\] arr1, …) {#array-avg} +Returns the average of elements in the source array. -Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements. +If the `func` function is specified, returns the average of elements converted by this function. Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +**Syntax** + +```sql +arrayAvg([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The average of function values (or the array average). + +Type: [Float64](../../sql-reference/data-types/float.md). + +**Examples** + +Query: + +```sql +SELECT arrayAvg([1, 2, 4]) AS res; +``` + +Result: + +```text +┌────────────────res─┐ +│ 2.3333333333333335 │ +└────────────────────┘ +``` + +Query: + +```sql +SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; +``` + +Result: + +```text +┌─res─┐ +│ 10 │ +└─────┘ +``` + ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing. diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 57c2ae42ada..a3d0c82d8ab 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -35,7 +35,7 @@ Takes any integer and converts it into [binary form](https://en.wikipedia.org/wi SELECT bitTest(number, index) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index` – position of bit. @@ -100,7 +100,7 @@ The conjuction for bitwise operations: SELECT bitTestAll(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). @@ -165,7 +165,7 @@ The disjunction for bitwise operations: SELECT bitTestAny(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. @@ -220,7 +220,7 @@ Calculates the number of bits set to one in the binary representation of a numbe bitCount(x) ``` -**Parameters** +**Arguments** - `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index a66098beffb..bfff70576f2 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -21,7 +21,7 @@ Build a bitmap from unsigned integer array. bitmapBuild(array) ``` -**Parameters** +**Arguments** - `array` – unsigned integer array. @@ -45,7 +45,7 @@ Convert bitmap to integer array. bitmapToArray(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -69,7 +69,7 @@ Return subset in specified range (not include the range_end). bitmapSubsetInRange(bitmap, range_start, range_end) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -97,7 +97,7 @@ Creates a subset of bitmap with n elements taken between `range_start` and `card bitmapSubsetLimit(bitmap, range_start, cardinality_limit) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -133,7 +133,7 @@ Checks whether the bitmap contains an element. bitmapContains(haystack, needle) ``` -**Parameters** +**Arguments** - `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches. - `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -167,7 +167,7 @@ bitmapHasAny(bitmap1, bitmap2) If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently. -**Parameters** +**Arguments** - `bitmap*` – bitmap object. @@ -197,7 +197,7 @@ If the second argument is an empty bitmap then returns 1. bitmapHasAll(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -221,7 +221,7 @@ Retrun bitmap cardinality of type UInt64. bitmapCardinality(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -243,7 +243,7 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em bitmapMin(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -263,7 +263,7 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. bitmapMax(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -283,7 +283,7 @@ Transform an array of values in a bitmap to another array of values, the result bitmapTransform(bitmap, from_array, to_array) -**Parameters** +**Arguments** - `bitmap` – bitmap object. - `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array. @@ -307,7 +307,7 @@ Two bitmap and calculation, the result is a new bitmap. bitmapAnd(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -331,7 +331,7 @@ Two bitmap or calculation, the result is a new bitmap. bitmapOr(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -355,7 +355,7 @@ Two bitmap xor calculation, the result is a new bitmap. bitmapXor(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -379,7 +379,7 @@ Two bitmap andnot calculation, the result is a new bitmap. bitmapAndnot(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -403,7 +403,7 @@ Two bitmap and calculation, return cardinality of type UInt64. bitmapAndCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -427,7 +427,7 @@ Two bitmap or calculation, return cardinality of type UInt64. bitmapOrCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -451,7 +451,7 @@ Two bitmap xor calculation, return cardinality of type UInt64. bitmapXorCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -475,7 +475,7 @@ Two bitmap andnot calculation, return cardinality of type UInt64. bitmapAndnotCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index 446a4729ff2..2d57cbb3bd5 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -17,7 +17,7 @@ SELECT if(cond, then, else) If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned. -**Parameters** +**Arguments** - `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL. - `then` - The expression to return if condition is met. @@ -117,7 +117,7 @@ Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_c Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)` -**Parameters:** +**Arguments:** - `cond_N` — The condition for the function to return `then_N`. - `then_N` — The result of the function when executed. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 9de780fb596..f26e1bee6c9 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -186,7 +186,7 @@ Truncates sub-seconds. toStartOfSecond(value[, timezone]) ``` -**Parameters** +**Arguments** - `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). @@ -328,7 +328,7 @@ For mode values with a meaning of “contains January 1”, the week contains Ja toWeek(date, [, mode][, Timezone]) ``` -**Parameters** +**Arguments** - `date` – Date or DateTime. - `mode` – Optional parameter, Range of values is \[0,9\], default is 0. @@ -378,9 +378,9 @@ date_trunc(unit, value[, timezone]) Alias: `dateTrunc`. -**Parameters** +**Arguments** -- `unit` — Part of date. [String](../syntax.md#syntax-string-literal). +- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). Possible values: - `second` @@ -435,6 +435,201 @@ Result: - [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) +## date\_add {#date_add} + +Adds specified date/time interval to the provided date. + +**Syntax** + +``` sql +date_add(unit, value, date) +``` + +Aliases: `dateAdd`, `DATE_ADD`. + +**Arguments** + +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + + +**Returned value** + +Returns Date or DateTime with `value` expressed in `unit` added to `date`. + +**Example** + +```sql +select date_add(YEAR, 3, toDate('2018-01-01')); +``` + +```text +┌─plus(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2021-01-01 │ +└───────────────────────────────────────────────┘ +``` + +## date\_diff {#date_diff} + +Returns the difference between two Date or DateTime values. + +**Syntax** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Aliases: `dateDiff`, `DATE_DIFF`. + +**Arguments** + +- `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +- `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: `int`. + +**Example** + +Query: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## date\_sub {#date_sub} + +Subtracts a time/date interval from the provided date. + +**Syntax** + +``` sql +date_sub(unit, value, date) +``` + +Aliases: `dateSub`, `DATE_SUB`. + +**Arguments** + +- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md) to subtract value from. + +**Returned value** + +Returns Date or DateTime with `value` expressed in `unit` subtracted from `date`. + +**Example** + +Query: + +``` sql +SELECT date_sub(YEAR, 3, toDate('2018-01-01')); +``` + +Result: + +``` text +┌─minus(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2015-01-01 │ +└────────────────────────────────────────────────┘ +``` + +## timestamp\_add {#timestamp_add} + +Adds the specified time value with the provided date or date time value. + +**Syntax** + +``` sql +timestamp_add(date, INTERVAL value unit) +``` + +Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. + +**Arguments** + +- `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. + +**Returned value** + +Returns Date or DateTime with the specified `value` expressed in `unit` added to `date`. + +**Example** + +```sql +select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH); +``` + +```text +┌─plus(toDate('2018-01-01'), toIntervalMonth(3))─┐ +│ 2018-04-01 │ +└────────────────────────────────────────────────┘ +``` + +## timestamp\_sub {#timestamp_sub} + +Returns the difference between two dates in the specified unit. + +**Syntax** + +``` sql +timestamp_sub(unit, value, date) +``` + +Aliases: `timeStampSub`, `TIMESTAMP_SUB`. + +**Arguments** + +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md). +- `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +**Returned value** + +Difference between `date` and the specified `value` expressed in `unit`. + +**Example** + +```sql +select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03')); +``` + +```text +┌─minus(toDateTime('2018-12-18 01:02:03'), toIntervalMonth(5))─┐ +│ 2018-07-18 01:02:03 │ +└──────────────────────────────────────────────────────────────┘ +``` + ## now {#now} Returns the current date and time. @@ -445,7 +640,7 @@ Returns the current date and time. now([timezone]) ``` -**Parameters** +**Arguments** - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). @@ -550,50 +745,6 @@ SELECT └──────────────────────────┴───────────────────────────────┘ ``` -## dateDiff {#datediff} - -Returns the difference between two Date or DateTime values. - -**Syntax** - -``` sql -dateDiff('unit', startdate, enddate, [timezone]) -``` - -**Parameters** - -- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). - - Supported values: second, minute, hour, day, week, month, quarter, year. - -- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -- `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -- `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. - -**Returned value** - -Difference between `startdate` and `enddate` expressed in `unit`. - -Type: `int`. - -**Example** - -Query: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter: a constant UInt32, set to 1800 by default. @@ -704,7 +855,7 @@ Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Prolepti toModifiedJulianDay(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -740,7 +891,7 @@ Similar to [toModifiedJulianDay()](#tomodifiedjulianday), but instead of raising toModifiedJulianDayOrNull(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -776,7 +927,7 @@ Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Varian fromModifiedJulianDay(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). @@ -812,7 +963,7 @@ Similar to [fromModifiedJulianDayOrNull()](#frommodifiedjuliandayornull), but in fromModifiedJulianDayOrNull(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index bc3f5ca4345..31e84c08b39 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -15,7 +15,7 @@ Returns the string with the length as the number of passed arguments and each by char(number_1, [number_2, ..., number_n]); ``` -**Parameters** +**Arguments** - `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). @@ -107,7 +107,7 @@ For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted. -**Parameters** +**Arguments** - `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index bef2f8137d0..0dd7469b25e 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128 Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). -Note that these functions work slowly. +Note that these functions work slowly until ClickHouse 21.1. ## encrypt {#encrypt} @@ -31,7 +31,7 @@ This function encrypts data using these modes: encrypt('mode', 'plaintext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). @@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Returned value** -- Ciphered String. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -52,57 +52,38 @@ Query: ``` sql CREATE TABLE encryption_test ( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; + `comment` String, + `secret` String +) +ENGINE = Memory ``` -Insert this data: +Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes: Query: ``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\ +('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\ +('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\ +('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212')); ``` -Example without `iv`: - Query: ``` sql -SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Result: ``` text -┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ -│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ -│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ -│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ -``` - -Example with `iv`: - -Query: - -``` sql -SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; -``` - -Result: - -``` text -┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ -│ aes-256-ctr │ │ -│ aes-256-ctr │ 7FB039F7 │ -│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ -└─────────────┴───────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` Example with `-gcm`: @@ -110,40 +91,26 @@ Example with `-gcm`: Query: ``` sql -SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \ +('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad')); + +SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%'; ``` Result: ``` text -┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ -│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ -│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ -│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ -``` - -Example with `-gcm` mode and with `aad`: - -Query: - -``` sql -SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; -``` - -Result: - -``` text -┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ -│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ -│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ -│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ ``` ## aes_encrypt_mysql {#aes_encrypt_mysql} -Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. +Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. + +Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`. Supported encryption modes: @@ -156,86 +123,106 @@ Supported encryption modes: **Syntax** -```sql +``` sql aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Ciphered String. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). + **Examples** -Create this table: +Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext: Query: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; +SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal; ``` -Insert this data: +Result: -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` +┌─ciphertexts_equal─┐ +│ 1 │ +└───────────────────┘ ``` -Example without `iv`: + +But `encrypt` fails when `key` or `iv` is longer than expected: Query: ``` sql -SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'); ``` Result: ``` text -┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ -│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ -│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ -│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ +Received exception from server (version 21.1.2): +Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). ``` -Example with `iv`: +While `aes_encrypt_mysql` produces MySQL-compatitalbe output: Query: ``` sql -SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext; +``` + +Result: + +```text +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Notice how supplying even longer `IV` produces the same result + +Query: + +``` sql +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext ``` Result: ``` text -┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ -│ aes-256-cfb128 │ │ -│ aes-256-cfb128 │ 7FB039F7 │ -│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ -└────────────────┴────────────────────────────────────────────────────────────┘ +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Which is binary equal to what MySQL produces on same inputs: + +``` sql +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) + +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` ## decrypt {#decrypt} -This function decrypts data using these modes: +This function decrypts ciphertext into a plaintext using these modes: - aes-128-ecb, aes-192-ecb, aes-256-ecb - aes-128-cbc, aes-192-cbc, aes-256-cbc @@ -247,11 +234,11 @@ This function decrypts data using these modes: **Syntax** -```sql +``` sql decrypt('mode', 'ciphertext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). @@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Examples** -Create this table: +Re-using table from [encrypt](./encryption-functions.md#encrypt). Query: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` - -Insert this data: - -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); -``` - -Query: - -``` sql - -SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Result: -```text -┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ -│ aes-128-ecb │ │ -│ aes-128-ecb │ text │ -│ aes-128-ecb │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` text +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` +Now let's try to decrypt all that data. + +Query: + +``` sql +SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test +``` + +Result: +``` text +┌─comment─────────────────────────────┬─plaintext─┐ +│ aes-256-cfb128 no IV │ Secret │ +│ aes-256-cfb128 no IV, different key │ �4� + � │ +│ aes-256-cfb128 with IV │ ���6�~ │ + │aes-256-cbc no IV │ �2*4�h3c�4w��@ +└─────────────────────────────────────┴───────────┘ +``` + +Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. + ## aes_decrypt_mysql {#aes_decrypt_mysql} Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. +Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`. + Supported decryption modes: - aes-128-ecb, aes-192-ecb, aes-256-ecb @@ -321,11 +313,11 @@ Supported decryption modes: **Syntax** -```sql +``` sql aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). @@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Examples** -Create this table: - -Query: - +Let's decrypt data we've previously encrypted with MySQL: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) -Insert this data: - -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` Query: - ``` sql -SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext ``` Result: - ``` text -┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ -│ aes-128-cbc │ │ -│ aes-128-cbc │ text │ -│ aes-128-cbc │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +┌─plaintext─┐ +│ Secret │ +└───────────┘ ``` [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 7df6ef54f2a..834fcdf8282 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -19,7 +19,7 @@ dictGet('dict_name', 'attr_name', id_expr) dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). @@ -108,7 +108,7 @@ Checks whether a key is present in a dictionary. dictHas('dict_name', id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. @@ -130,7 +130,7 @@ Creates an array, containing all the parents of a key in the [hierarchical dicti dictGetHierarchy('dict_name', key) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -149,7 +149,7 @@ Checks the ancestor of a key through the whole hierarchical chain in the diction dictIsIn('dict_name', child_id_expr, ancestor_id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -185,7 +185,7 @@ dictGet[Type]('dict_name', 'attr_name', id_expr) dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index c32af7194fb..df75e96c8fb 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -13,7 +13,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNull(x) ``` -**Parameters** +**Arguments** - `x` — A value with a non-compound data type. @@ -53,7 +53,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — A value with a non-compound data type. @@ -93,7 +93,7 @@ Checks from left to right whether `NULL` arguments were passed and returns the f coalesce(x,...) ``` -**Parameters:** +**Arguments:** - Any number of parameters of a non-compound type. All parameters must be compatible by data type. @@ -136,7 +136,7 @@ Returns an alternative value if the main argument is `NULL`. ifNull(x,alt) ``` -**Parameters:** +**Arguments:** - `x` — The value to check for `NULL`. - `alt` — The value that the function returns if `x` is `NULL`. @@ -176,7 +176,7 @@ Returns `NULL` if the arguments are equal. nullIf(x, y) ``` -**Parameters:** +**Arguments:** `x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception. @@ -215,7 +215,7 @@ Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md assumeNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — The original value. @@ -277,7 +277,7 @@ Converts the argument type to `Nullable`. toNullable(x) ``` -**Parameters:** +**Arguments:** - `x` — The value of any non-compound type. diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 6f288a7687d..c27eab0b421 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -72,7 +72,7 @@ Returns an array of [geohash](#geohash)-encoded strings of given precision that geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision) ``` -**Parameters** +**Arguments** - `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). - `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 4ed651e4e9e..9dda947b3a7 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -162,7 +162,7 @@ Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. geoToH3(lon, lat, resolution) ``` -**Parameters** +**Arguments** - `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). - `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -201,7 +201,7 @@ Result: h3kRing(h3index, k) ``` -**Parameters** +**Arguments** - `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md) @@ -315,7 +315,7 @@ Returns whether or not the provided [H3](#h3index) indexes are neighbors. h3IndexesAreNeighbors(index1, index2) ``` -**Parameters** +**Arguments** - `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -353,7 +353,7 @@ Returns an array of child indexes for the given [H3](#h3index) index. h3ToChildren(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -390,7 +390,7 @@ Returns the parent (coarser) index containing the given [H3](#h3index) index. h3ToParent(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9394426b20b..465ad01527f 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -18,9 +18,9 @@ halfMD5(par1, ...) The function is relatively slow (5 million short strings per second per processor core). Consider using the [sipHash64](#hash_functions-siphash64) function instead. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -61,9 +61,9 @@ Function [interprets](../../sql-reference/functions/type-conversion-functions.md 3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them. 4. The previous step is repeated for all the remaining elements of the initial hash array. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -97,9 +97,9 @@ cityHash64(par1,...) This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -166,9 +166,9 @@ farmHash64(par1, ...) These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h). -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -226,7 +226,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 javaHashUTF16LE(stringUtf16le) ``` -**Parameters** +**Arguments** - `stringUtf16le` — a string in UTF-16LE encoding. @@ -292,9 +292,9 @@ Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/ metroHash64(par1, ...) ``` -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -327,9 +327,9 @@ murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -358,7 +358,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val gccMurmurHash(par1, ...); ``` -**Parameters** +**Arguments** - `par1, ...` — A variable number of parameters that can be any of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -395,9 +395,9 @@ murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -424,7 +424,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu murmurHash3_128( expr ) ``` -**Parameters** +**Arguments** - `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index bfa1998d68a..964265a461b 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -32,7 +32,7 @@ If you use official ClickHouse packages, you need to install the `clickhouse-com addressToLine(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -123,7 +123,7 @@ Converts virtual memory address inside ClickHouse server process to the symbol f addressToSymbol(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -220,7 +220,7 @@ Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) demangle(symbol) ``` -**Parameters** +**Arguments** - `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. @@ -345,7 +345,7 @@ Emits trace log message to server log for each [Block](https://clickhouse.tech/d logTrace('message') ``` -**Parameters** +**Arguments** - `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index faf551601ac..eaea5e250fb 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -115,9 +115,20 @@ LIMIT 10 ## IPv6StringToNum(s) {#ipv6stringtonums} -The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. +The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. +If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. HEX can be uppercase or lowercase. +``` sql +SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0); +``` + +``` text +┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐ +│ ::ffff:127.0.0.1 │ +└─────────────────────────────────────────────┘ +``` + ## IPv4ToIPv6(x) {#ipv4toipv6x} Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples: @@ -214,6 +225,7 @@ SELECT ## toIPv6(string) {#toipv6string} An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`. +If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. ``` sql WITH @@ -243,33 +255,91 @@ SELECT └───────────────────────────────────┴──────────────────────────────────┘ ``` - -## isIPv4String - -Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise. - ``` sql -SELECT isIPv4String('127.0.0.1') +SELECT toIPv6('127.0.0.1') ``` ``` text -┌─isIPv4String('127.0.0.1')─┐ -│ 1 │ -└───────────────────────────┘ +┌─toIPv6('127.0.0.1')─┐ +│ ::ffff:127.0.0.1 │ +└─────────────────────┘ ``` -## isIPv6String +## isIPv4String {#isipv4string} -Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise. +Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`. -``` sql -SELECT isIPv6String('2001:438:ffff::407d:1bc1') +**Syntax** + +```sql +isIPv4String(string) ``` +**Arguments** + +- `string` — IP address. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- `1` if `string` is IPv4 address, `0` otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +```sql +SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Result: + ``` text -┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐ -│ 1 │ -└──────────────────────────────────────────┘ +┌─addr─────────────┬─isIPv4String(addr)─┐ +│ 0.0.0.0 │ 1 │ +│ 127.0.0.1 │ 1 │ +│ ::ffff:127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + +## isIPv6String {#isipv6string} + +Determines whether the input string is an IPv6 address or not. If `string` is IPv4 address returns `0`. + +**Syntax** + +```sql +isIPv6String(string) +``` + +**Arguments** + +- `string` — IP address. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- `1` if `string` is IPv6 address, `0` otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +``` sql +SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Result: + +``` text +┌─addr─────────────┬─isIPv6String(addr)─┐ +│ :: │ 1 │ +│ 1111::ffff │ 1 │ +│ ::ffff:127.0.0.1 │ 1 │ +│ 127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ ``` [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 05e755eaddc..edee048eb77 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -236,7 +236,7 @@ Extracts raw data from a JSON object. JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) ``` -**Parameters** +**Arguments** - `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. - `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 8627fc26bad..f103a4ea421 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -27,7 +27,7 @@ Compares test groups (variants) and calculates for each group the probability to bayesAB(distribution_name, higher_is_better, variant_names, x, y) ``` -**Parameters** +**Arguments** - `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values: diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 8dc287593c7..f56a721c0c0 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -121,7 +121,7 @@ Accepts a numeric argument and returns a UInt64 number close to 10 to the power cosh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -157,7 +157,7 @@ Result: acosh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -197,7 +197,7 @@ Result: sinh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -233,7 +233,7 @@ Result: asinh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -273,7 +273,7 @@ Result: atanh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -309,7 +309,7 @@ The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the atan2(y, x) ``` -**Parameters** +**Arguments** - `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -346,7 +346,7 @@ Calculates the length of the hypotenuse of a right-angle triangle. The [function hypot(x, y) ``` -**Parameters** +**Arguments** - `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -383,7 +383,7 @@ Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_loga log1p(x) ``` -**Parameters** +**Arguments** - `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -423,7 +423,7 @@ The `sign` function can extract the sign of a real number. sign(x) ``` -**Parameters** +**Arguments** - `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 08d34770f57..04e921b5c55 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -19,7 +19,7 @@ Gets a named value from the [macros](../../operations/server-configuration-param getMacro(name); ``` -**Parameters** +**Arguments** - `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). @@ -108,7 +108,7 @@ Extracts the trailing part of a string after the last slash or backslash. This f basename( expr ) ``` -**Parameters** +**Arguments** - `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. @@ -182,13 +182,102 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth Gets the size of the block. In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. -## byteSize(...) {#function-bytesize} +## byteSize {#function-bytesize} -Get an estimate of uncompressed byte size of its arguments in memory. -E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length). -The function can take multiple arguments. The typical application is byteSize(*). +Returns estimation of uncompressed byte size of its arguments in memory. -Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis. +**Syntax** + +```sql +byteSize(argument [, ...]) +``` + +**Arguments** + +- `argument` — Value. + +**Returned value** + +- Estimation of byte size of the arguments in memory. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Examples** + +For [String](../../sql-reference/data-types/string.md) arguments the funtion returns the string length + 9 (terminating zero + length). + +Query: + +```sql +SELECT byteSize('string'); +``` + +Result: + +```text +┌─byteSize('string')─┐ +│ 15 │ +└────────────────────┘ +``` + +Query: + +```sql +CREATE TABLE test +( + `key` Int32, + `u8` UInt8, + `u16` UInt16, + `u32` UInt32, + `u64` UInt64, + `i8` Int8, + `i16` Int16, + `i32` Int32, + `i64` Int64, + `f32` Float32, + `f64` Float64 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO test VALUES(1, 8, 16, 32, 64, -8, -16, -32, -64, 32.32, 64.64); + +SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +key: 1 +byteSize(UInt8): 1 +byteSize(UInt16): 2 +byteSize(UInt32): 4 +byteSize(UInt64): 8 +byteSize(Int8): 1 +byteSize(Int16): 2 +byteSize(Int32): 4 +byteSize(Int64): 8 +byteSize(Float32): 4 +byteSize(Float64): 8 +``` + +If the function takes multiple arguments, it returns their combined byte size. + +Query: + +```sql +SELECT byteSize(NULL, 1, 0.3, ''); +``` + +Result: + +```text +┌─byteSize(NULL, 1, 0.3, '')─┐ +│ 19 │ +└────────────────────────────┘ +``` ## materialize(x) {#materializex} @@ -260,7 +349,7 @@ The function is intended for development, debugging and demonstration. isConstant(x) ``` -**Parameters** +**Arguments** - `x` — Expression to check. @@ -331,7 +420,7 @@ Checks whether floating point value is finite. ifNotFinite(x,y) -**Parameters** +**Arguments** - `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). - `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). @@ -371,7 +460,7 @@ Allows building a unicode-art diagram. `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. -Parameters: +**Arguments** - `x` — Size to display. - `min, max` — Integer constants. The value must fit in `Int64`. @@ -556,7 +645,7 @@ Accepts the time delta in seconds. Returns a time delta with (year, month, day, formatReadableTimeDelta(column[, maximum_unit]) ``` -**Parameters** +**Arguments** - `column` — A column with numeric time delta. - `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. @@ -641,7 +730,7 @@ The result of the function depends on the affected data blocks and the order of The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery. -**Parameters** +**Arguments** - `column` — A column name or scalar expression. - `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). @@ -820,6 +909,66 @@ WHERE diff != 1 Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +## runningConcurrency {#runningconcurrency} + +Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time. + +!!! warning "Warning" + Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block. + +The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order. + +**Syntax** + +``` sql +runningConcurrency(begin, end) +``` + +**Arguments** + +- `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column for the ending time of events (exclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). + +Note that two columns `begin` and `end` must have the same type. + +**Returned values** + +- The concurrency of events at the data point. + +Type: [UInt32](../../sql-reference/data-types/int-uint.md) + +**Example** + +Input table: + +``` text +┌───────────────begin─┬─────────────────end─┐ +│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │ +│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │ +│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │ +│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │ +│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │ +└─────────────────────┴─────────────────────┘ +``` + +Query: + +``` sql +SELECT runningConcurrency(begin, end) FROM example +``` + +Result: + +``` text +┌─runningConcurrency(begin, end)─┐ +│ 1 │ +│ 2 │ +│ 3 │ +│ 2 │ +│ 1 │ +└────────────────────────────────┘ +``` + ## MACNumToString(num) {#macnumtostringnum} Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). @@ -840,7 +989,7 @@ Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). getSizeOfEnumType(value) ``` -**Parameters:** +**Arguments:** - `value` — Value of type `Enum`. @@ -869,7 +1018,7 @@ Returns size on disk (without taking into account compression). blockSerializedSize(value[, value[, ...]]) ``` -**Parameters** +**Arguments** - `value` — Any value. @@ -901,7 +1050,7 @@ Returns the name of the class that represents the data type of the column in RAM toColumnTypeName(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -941,7 +1090,7 @@ Outputs a detailed description of data structures in RAM dumpColumnStructure(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -971,7 +1120,7 @@ Does not include default values for custom columns set by the user. defaultValueOfArgumentType(expression) ``` -**Parameters:** +**Arguments:** - `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. @@ -1013,7 +1162,7 @@ Does not include default values for custom columns set by the user. defaultValueOfTypeName(type) ``` -**Parameters:** +**Arguments:** - `type` — A string representing a type name. @@ -1055,7 +1204,7 @@ Used for internal implementation of [arrayJoin](../../sql-reference/functions/ar SELECT replicate(x, arr); ``` -**Parameters:** +**Arguments:** - `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. - `x` — The value that the resulting array will be filled with. @@ -1188,7 +1337,7 @@ Takes state of aggregate function. Returns result of aggregation (or finalized s finalizeAggregation(state) ``` -**Parameters** +**Arguments** - `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). @@ -1292,7 +1441,7 @@ Accumulates states of an aggregate function for each row of a data block. runningAccumulate(agg_state[, grouping]); ``` -**Parameters** +**Arguments** - `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). - `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. @@ -1398,7 +1547,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` st joinGet(join_storage_table_name, `value_column`, join_keys) ``` -**Parameters** +**Arguments** - `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. - `value_column` — name of the column of the table that contains required data. @@ -1502,7 +1651,7 @@ Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/AS randomPrintableASCII(length) ``` -**Parameters** +**Arguments** - `length` — Resulting string length. Positive integer. @@ -1538,7 +1687,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomString(length) ``` -**Parameters** +**Arguments** - `length` — String length. Positive integer. @@ -1586,7 +1735,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomFixedString(length); ``` -**Parameters** +**Arguments** - `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1624,7 +1773,7 @@ Generates a random string of a specified length. Result string contains valid UT randomStringUTF8(length); ``` -**Parameters** +**Arguments** - `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1696,7 +1845,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is isDecimalOverflow(d, [p]) ``` -**Parameters** +**Arguments** - `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). - `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). @@ -1733,7 +1882,7 @@ Returns number of decimal digits you need to represent the value. countDigits(x) ``` -**Parameters** +**Arguments** - `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. @@ -1792,7 +1941,7 @@ Returns [native interface](../../interfaces/tcp.md) TCP port number listened by tcpPort() ``` -**Parameters** +**Arguments** - None. diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 68998928398..2b9846344e4 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -32,7 +32,7 @@ Produces a constant column with a random value. randConstant([x]) ``` -**Parameters** +**Arguments** - `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. @@ -81,7 +81,7 @@ fuzzBits([s], [prob]) Inverts bits of `s`, each with probability `prob`. -**Parameters** +**Arguments** - `s` - `String` or `FixedString` - `prob` - constant `Float32/64` diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 922cf7374d7..83db1975366 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -35,7 +35,7 @@ The function returns the nearest number of the specified order. In case when giv round(expression [, decimal_places]) ``` -**Parameters:** +**Arguments:** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — An integer value. @@ -114,7 +114,7 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: roundBankers(expression [, decimal_places]) ``` -**Parameters** +**Arguments** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 25f41211b47..c70ee20f076 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -16,7 +16,7 @@ Returns an array of selected substrings. Empty substrings may be selected if the splitByChar(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -53,7 +53,7 @@ Splits a string into substrings separated by a string. It uses a constant string splitByString(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -121,7 +121,7 @@ Extracts all groups from non-overlapping substrings matched by a regular express extractAllGroups(text, regexp) ``` -**Parameters** +**Arguments** - `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 2b93dd924a3..dc5304b39aa 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i toValidUTF8( input_string ) ``` -Parameters: +**Arguments** - input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. @@ -104,7 +104,7 @@ Repeats a string as many times as specified and concatenates the replicated valu repeat(s, n) ``` -**Parameters** +**Arguments** - `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). - `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md). @@ -173,7 +173,7 @@ Concatenates the strings listed in the arguments, without a separator. concat(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -211,7 +211,7 @@ The function is named “injective” if it always returns different result for concatAssumeInjective(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -328,7 +328,7 @@ By default removes all consecutive occurrences of common whitespace (ASCII chara trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) ``` -**Parameters** +**Arguments** - `trim_character` — specified characters for trim. [String](../../sql-reference/data-types/string.md). - `input_string` — string for trim. [String](../../sql-reference/data-types/string.md). @@ -367,7 +367,7 @@ trimLeft(input_string) Alias: `ltrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -405,7 +405,7 @@ trimRight(input_string) Alias: `rtrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -443,7 +443,7 @@ trimBoth(input_string) Alias: `trim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -496,7 +496,7 @@ Replaces literals, sequences of literals and complex aliases with placeholders. normalizeQuery(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -532,7 +532,7 @@ Returns identical 64bit hash values without the values of literals for similar q normalizedQueryHash(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -570,7 +570,7 @@ The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, encodeXMLComponent(x) ``` -**Parameters** +**Arguments** - `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 4036974dd37..83b0edea438 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -14,8 +14,6 @@ The search is case-sensitive by default in all these functions. There are separa Returns the position (in bytes) of the found substring in the string, starting from 1. -Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function doesn’t throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on. - For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive). **Syntax** @@ -26,7 +24,7 @@ position(haystack, needle[, start_pos]) Alias: `locate(haystack, needle[, start_pos])`. -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -97,7 +95,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -140,7 +138,7 @@ For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#p positionUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -213,7 +211,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -258,7 +256,7 @@ The search is performed on sequences of bytes without respect to string encoding multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -373,7 +371,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsHorizontal(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -414,7 +412,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsVertical(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -473,7 +471,7 @@ Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference ilike(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters. @@ -550,7 +548,7 @@ For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-re countSubstrings(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -616,7 +614,7 @@ Returns the number of substring occurrences case-insensitive. countSubstringsCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -682,7 +680,7 @@ Returns the number of substring occurrences in `UTF-8` case-insensitive. SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -734,7 +732,7 @@ Returns the number of regular expression matches for a `pattern` in a `haystack` countMatches(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index dcbcd3e374b..1006b68b8ee 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -45,7 +45,7 @@ untuple(x) You can use the `EXCEPT` expression to skip columns as a result of the query. -**Parameters** +**Arguments** - `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index a46c36395b8..2b3a9d9103f 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -5,6 +5,68 @@ toc_title: Working with maps # Functions for maps {#functions-for-working-with-tuple-maps} +## map {#function-map} + +Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type. + +**Syntax** + +``` sql +map(key1, value1[, key2, value2, ...]) +``` + +**Arguments** + +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Data structure as `key:value` pairs. + +Type: [Map(key, value)](../../sql-reference/data-types/map.md). + +**Examples** + +Query: + +``` sql +SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +``` + +Result: + +``` text +┌─map('key1', number, 'key2', multiply(number, 2))─┐ +│ {'key1':0,'key2':0} │ +│ {'key1':1,'key2':2} │ +│ {'key1':2,'key2':4} │ +└──────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; +INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +SELECT a['key2'] FROM table_map; +``` + +Result: + +``` text +┌─arrayElement(a, 'key2')─┐ +│ 0 │ +│ 2 │ +│ 4 │ +└─────────────────────────┘ +``` + +**See Also** + +- [Map(key, value)](../../sql-reference/data-types/map.md) data type + + ## mapAdd {#function-mapadd} Collect all the keys and sum corresponding values. @@ -15,7 +77,7 @@ Collect all the keys and sum corresponding values. mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -49,7 +111,7 @@ Collect all the keys and subtract corresponding values. mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -87,7 +149,7 @@ Generates a map, where keys are a series of numbers, from minimum to maximum key The number of elements in `keys` and `values` must be the same for each row. -**Parameters** +**Arguments** - `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). - `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). @@ -112,4 +174,4 @@ Result: └──────────────────────────────┴───────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 6237cd6a976..450945a5ab9 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) - `toInt128(expr)` — Results in the `Int128` data type. - `toInt256(expr)` — Results in the `Int256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -88,7 +88,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md - `toUInt64(expr)` — Results in the `UInt64` data type. - `toUInt256(expr)` — Results in the `UInt256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -154,7 +154,7 @@ Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data- These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -199,7 +199,7 @@ Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/dec These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -303,9 +303,30 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut └────────────┴───────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} +## reinterpretAs(x, T) {#type_conversion_function-cast} -## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} +Performs byte reinterpretation of ‘x’ as ‘t’ data type. + +Following reinterpretations are allowed: +1. Any type that has fixed size and value of that type can be represented continuously into FixedString. +2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString, + +``` sql +SELECT reinterpretAs(toInt8(-1), 'UInt8') as int_to_uint, + reinterpretAs(toInt8(1), 'Float32') as int_to_float, + reinterpretAs('1', 'UInt32') as string_to_int; +``` + +``` text +┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐ +│ 255 │ 1e-45 │ 49 │ +└─────────────┴──────────────┴───────────────┘ +``` + +## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretasuint8163264256} + +## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretasint8163264128256} ## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264} @@ -313,71 +334,13 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut ## reinterpretAsDateTime {#reinterpretasdatetime} -These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. - ## reinterpretAsString {#type_conversion_functions-reinterpretAsString} -This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. - ## reinterpretAsFixedString {#reinterpretasfixedstring} -This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. - ## reinterpretAsUUID {#reinterpretasuuid} -This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. - -**Syntax** - -``` sql -reinterpretAsUUID(fixed_string) -``` - -**Parameters** - -- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). - -**Returned value** - -- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). - -**Examples** - -String to UUID. - -Query: - -``` sql -SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) -``` - -Result: - -``` text -┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ -│ 08090a0b-0c0d-0e0f-0001-020304050607 │ -└───────────────────────────────────────────────────────────────────────┘ -``` - -Going back and forth from String to UUID. - -Query: - -``` sql -WITH - generateUUIDv4() AS uuid, - identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, - reinterpretAsUUID(reverse(unhex(str))) AS uuid2 -SELECT uuid = uuid2; -``` - -Result: - -``` text -┌─equals(uuid, uuid2)─┐ -│ 1 │ -└─────────────────────┘ -``` +These functions are aliases for `reinterpretAs`function. ## CAST(x, T) {#type_conversion_function-cast} @@ -504,7 +467,7 @@ toIntervalQuarter(number) toIntervalYear(number) ``` -**Parameters** +**Arguments** - `number` — Duration of interval. Positive integer number. @@ -542,7 +505,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112 parseDateTimeBestEffort(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -654,7 +617,7 @@ This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebestef parseDateTimeBestEffortUS(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -738,7 +701,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers toLowCardinality(expr) ``` -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -778,7 +741,7 @@ Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Inpu toUnixTimestamp64Milli(value) ``` -**Parameters** +**Arguments** - `value` — DateTime64 value with any precision. @@ -830,7 +793,7 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and fromUnixTimestamp64Milli(value [, ti]) ``` -**Parameters** +**Arguments** - `value` — `Int64` value with any precision. - `timezone` — `String` (optional) timezone name of the result. @@ -862,7 +825,7 @@ Converts arbitrary expressions into a string via given format. formatRow(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. @@ -903,7 +866,7 @@ Converts arbitrary expressions into a string via given format. The function trim formatRowNoNewline(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 006542f494a..9e79ef2d0cb 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -25,7 +25,7 @@ Extracts the hostname from a URL. domain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -76,7 +76,7 @@ Extracts the the top-level domain from a URL. topLevelDomain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -133,10 +133,9 @@ For example: ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} -Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if: +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. -- you need fresh TLD list, -- or you have custom. +Can be useful if you need fresh TLD list or you have custom. Configuration example: @@ -149,21 +148,150 @@ Configuration example: ``` -Example: +**Syntax** -- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`. +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name. + +Can be useful if you need fresh TLD list or you have custom. + +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} -Same as `firstSignificantSubdomain` but accept custom TLD list name. +Returns the first significant subdomain. Accepts customs TLD list name. -### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} +Can be useful if you need fresh TLD list or you have custom. -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- First significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) {#port} @@ -242,7 +370,7 @@ Extracts network locality (`username:password@host:port`) from a URL. netloc(URL) ``` -**Parameters** +**Arguments** - `url` — URL. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index f70532252c7..56530b5e83b 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -115,7 +115,7 @@ Finds the highest continent in the hierarchy for the region. regionToTopContinent(id[, geobase]); ``` -**Parameters** +**Arguments** - `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 0ea4d4b3dc5..0fa2c492bee 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -20,6 +20,7 @@ The following actions are supported: - [ADD COLUMN](#alter_add-column) — Adds a new column to the table. - [DROP COLUMN](#alter_drop-column) — Deletes the column. +- [RENAME COLUMN](#alter_rename-column) — Renames the column. - [CLEAR COLUMN](#alter_clear-column) — Resets column values. - [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. - [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL. @@ -78,6 +79,22 @@ Example: ALTER TABLE visits DROP COLUMN browser ``` +## RENAME COLUMN {#alter_rename-column} + +``` sql +RENAME COLUMN [IF EXISTS] name to new_name +``` + +Renames the column `name` to `new_name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. Since renaming does not involve the underlying data, the query is completed almost instantly. + +**NOTE**: Columns specified in the key expression of the table (either with `ORDER BY` or `PRIMARY KEY`) cannot be renamed. Trying to change these columns will produce `SQL Error [524]`. + +Example: + +``` sql +ALTER TABLE visits RENAME COLUMN webBrowser TO browser +``` + ## CLEAR COLUMN {#alter_clear-column} ``` sql diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md index 905c57503fc..a43b5255598 100644 --- a/docs/en/sql-reference/statements/alter/quota.md +++ b/docs/en/sql-reference/statements/alter/quota.md @@ -5,7 +5,7 @@ toc_title: QUOTA # ALTER QUOTA {#alter-quota-statement} -Changes [quotas](../../../operations/access-rights.md#quotas-management). +Changes quotas. Syntax: @@ -14,13 +14,13 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] [RENAME TO new_name] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} - {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | + {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. -Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. +Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index ec980af921f..71416abf588 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -13,14 +13,14 @@ Syntax: CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} - {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | + {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` -Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. +Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. -Parameters `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. +Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 4370735b8d9..95d66175021 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -59,6 +59,10 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. +Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view. + Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query. There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md). + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 2928e50224d..c517a515ab7 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). +You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). For example, consider the table: @@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum - The values calculated from the `DEFAULT` expressions specified in the table definition. - Zeros and empty strings, if `DEFAULT` expressions are not defined. -If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query. - Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query: ``` sql diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 7c13772ffdf..e99ebef838c 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -278,5 +278,4 @@ Other ways to make settings see [here](../../../operations/settings/index.md). SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1; ``` -[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) - +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index 5bbd22dfe4e..be6ba2b8bc4 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -13,7 +13,7 @@ Supports all data types that can be stored in table except `LowCardinality` and generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]); ``` -**Parameters** +**Arguments** - `name` — Name of corresponding column. - `TypeName` — Type of corresponding column. diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index eec4a1d0c46..14cd4369285 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -13,7 +13,7 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) ``` -**Parameters** +**Arguments** - `host:port` — MySQL server address. diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 9997971af65..08096c2b019 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -13,7 +13,7 @@ Turns a subquery into a table. The function implements views (see [CREATE VIEW]( view(subquery) ``` -**Parameters** +**Arguments** - `subquery` — `SELECT` query. diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md new file mode 100644 index 00000000000..cbf03a44d46 --- /dev/null +++ b/docs/en/sql-reference/window-functions/index.md @@ -0,0 +1,60 @@ +--- +toc_priority: 62 +toc_title: Window Functions +--- + +# [experimental] Window Functions + +!!! warning "Warning" +This is an experimental feature that is currently in development and is not ready +for general use. It will change in unpredictable backwards-incompatible ways in +the future releases. Set `allow_experimental_window_functions = 1` to enable it. + +ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: + +| Feature | Support or workaround | +| --------| ----------| +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | +| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | +| `ROWS` frame | supported | +| `RANGE` frame | supported, the default | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead | +| `GROUPS` frame | not supported | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | +| `rank()`, `dense_rank()`, `row_number()` | supported | +| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`| + +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md index 5975edd3c6c..bf4e4fb0fcc 100644 --- a/docs/en/whats-new/changelog/2020.md +++ b/docs/en/whats-new/changelog/2020.md @@ -5,6 +5,22 @@ toc_title: '2020' ### ClickHouse release 20.12 +### ClickHouse release v20.12.5.14-stable, 2020-12-28 + +#### Bug Fix + +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement + +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + + ### ClickHouse release v20.12.4.5-stable, 2020-12-24 #### Bug Fix @@ -142,6 +158,70 @@ toc_title: '2020' ## ClickHouse release 20.11 +### ClickHouse release v20.11.7.16-stable, 2021-03-02 + +#### Improvement + +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix + +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement + +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + + + ### ClickHouse release v20.11.6.6-stable, 2020-12-24 #### Bug Fix @@ -588,6 +668,60 @@ toc_title: '2020' ## ClickHouse release 20.9 +### ClickHouse release v20.9.7.11-stable, 2020-12-07 + +#### Performance Improvement + +* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix + +* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). +* Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)). +* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)). +* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)). +* Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)). +* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). + +#### Build/Testing/Packaging Improvement + +* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + + +### ClickHouse release v20.9.6.14-stable, 2020-11-20 + +#### Improvement + +* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). +* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)). + +#### Bug Fix + +* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). +* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). + + ### ClickHouse release v20.9.5.5-stable, 2020-11-13 #### Bug Fix @@ -744,6 +878,23 @@ toc_title: '2020' ## ClickHouse release 20.8 +### ClickHouse release v20.8.12.2-lts, 2021-01-16 + +#### Bug Fix + +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.8.11.17-lts, 2020-12-25 + +#### Bug Fix + +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). + + ### ClickHouse release v20.8.10.13-lts, 2020-12-24 #### Bug Fix diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md index a6297070663..be33851574a 100644 --- a/docs/es/operations/backup.md +++ b/docs/es/operations/backup.md @@ -5,7 +5,7 @@ toc_title: Copia de seguridad de datos # Copia de seguridad de datos {#data-backup} -Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse. +Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](server-configuration-parameters/settings.md#max-table-size-to-drop). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse. Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**. diff --git a/docs/fr/operations/backup.md b/docs/fr/operations/backup.md index 9a463372947..953a96a04eb 100644 --- a/docs/fr/operations/backup.md +++ b/docs/fr/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "La Sauvegarde Des Donn\xE9es" # La Sauvegarde Des Données {#data-backup} -Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés. +Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](server-configuration-parameters/settings.md#max-table-size-to-drop). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés. Afin d'atténuer efficacement les erreurs humaines possibles, vous devez préparer soigneusement une stratégie de sauvegarde et de restauration de vos données **préalablement**. diff --git a/docs/ja/operations/backup.md b/docs/ja/operations/backup.md index 994271371a4..b0cde00e23c 100644 --- a/docs/ja/operations/backup.md +++ b/docs/ja/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "\u30C7\u30FC\u30BF\u30D0\u30C3\u30AF\u30A2" # データバックア {#data-backup} -ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). しかし、これらの保障措置がカバーしないすべてのケースで回避. +ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](server-configuration-parameters/settings.md#max-table-size-to-drop). しかし、これらの保障措置がカバーしないすべてのケースで回避. ヒューマンエラーを効果的に軽減するには、データのバックアップと復元のための戦略を慎重に準備する必要があります **事前に**. diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 4d71dca46a7..1b211259bbb 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -714,6 +714,7 @@ auto s = std::string{"Hello"}; ### Пользовательская ошибка {#error-messages-user-error} Такая ошибка вызвана действиями пользователя (неверный синтаксис запроса) или конфигурацией внешних систем (кончилось место на диске). Предполагается, что пользователь может устранить её самостоятельно. Для этого в сообщении об ошибке должна содержаться следующая информация: + * что произошло. Это должно объясняться в пользовательских терминах (`Function pow() is not supported for data type UInt128`), а не загадочными конструкциями из кода (`runtime overload resolution failed in DB::BinaryOperationBuilder::Impl, UInt128, Int8>::kaboongleFastPath()`). * почему/где/когда -- любой контекст, который помогает отладить проблему. Представьте, как бы её отлаживали вы (программировать и пользоваться отладчиком нельзя). * что можно предпринять для устранения ошибки. Здесь можно перечислить типичные причины проблемы, настройки, влияющие на это поведение, и так далее. diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index f23ac0cddd6..3022542e294 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`. - Репликация может быть легко нарушена. - Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены. +- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`. ## Примеры использования {#examples-of-use} @@ -156,4 +157,4 @@ SELECT * FROM mysql.test; └───┴─────┴──────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..9b68bcfc770 --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,44 @@ +--- +toc_priority: 6 +toc_title: EmbeddedRocksDB +--- + +# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine} + +Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/). + +## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB +PRIMARY KEY(primary_key_name); +``` + +Обязательные параметры: + +- `primary_key_name` может быть любое имя столбца из списка столбцов. +- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. +- Поддерживается только один столбец в первичном ключе. +- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. +- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`. + +Пример: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key; +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) \ No newline at end of file diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md index 02189cf9e55..db7e527442e 100644 --- a/docs/ru/engines/table-engines/integrations/index.md +++ b/docs/ru/engines/table-engines/integrations/index.md @@ -12,7 +12,10 @@ toc_priority: 30 - [ODBC](../../../engines/table-engines/integrations/odbc.md) - [JDBC](../../../engines/table-engines/integrations/jdbc.md) - [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) [Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md new file mode 100644 index 00000000000..0765b3909de --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -0,0 +1,57 @@ +--- +toc_priority: 7 +toc_title: MongoDB +--- + +# MongoDB {#mongodb} + +Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается. + +## Создание таблицы {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name +( + name1 [type1], + name2 [type2], + ... +) ENGINE = MongoDB(host:port, database, collection, user, password); +``` + +**Параметры движка** + +- `host:port` — адрес сервера MongoDB. + +- `database` — имя базы данных на удалённом сервере. + +- `collection` — имя коллекции на удалённом сервере. + +- `user` — пользователь MongoDB. + +- `password` — пароль пользователя. + +## Примеры использования {#usage-example} + +Таблица в ClickHouse для чтения данных из колекции MongoDB: + +``` text +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); +``` + +Запрос к таблице: + +``` sql +SELECT COUNT() FROM mongo_table; +``` + +``` text +┌─count()─┐ +│ 4 │ +└─────────┘ +``` + +[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index dedb5842d68..2a44e085ede 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -52,10 +52,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Требуемая конфигурация: Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse. +Требуемая конфигурация: + ``` xml root @@ -63,6 +64,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` +Дополнительная конфигурация: + +``` xml + + clickhouse + +``` + Example: ``` sql diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 9b2a5eafca3..6fc566b7c31 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -37,7 +37,10 @@ ORDER BY expr [PARTITION BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] +[TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] [SETTINGS name=value, ...] ``` @@ -71,7 +74,7 @@ ORDER BY expr Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`. - Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. + Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` указывает действие, которое будет выполнено с частью: удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`), или агрегирование данных в устаревших строках. Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl) @@ -91,6 +94,7 @@ ORDER BY expr - `max_parts_in_total` — максимальное количество кусков во всех партициях. - `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. - `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. + - `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках. **Пример задания секций** @@ -443,16 +447,28 @@ ALTER TABLE example_table Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки. ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ``` За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения: - `DELETE` - удалить данные (действие по умолчанию); - `TO DISK 'aaa'` - переместить данные на диск `aaa`; -- `TO VOLUME 'bbb'` - переместить данные на том `bbb`. +- `TO VOLUME 'bbb'` - переместить данные на том `bbb`; +- `GROUP BY` - агрегировать данные. -Примеры: +В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо). + +Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. + +Если колонка не является частью выражения `GROUP BY` и не задается напрямую в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк (как будто к ней применяется агрегирующая функция `any`). + +**Примеры** + +Создание таблицы с TTL: ``` sql CREATE TABLE example_table @@ -468,13 +484,43 @@ TTL d + INTERVAL 1 MONTH [DELETE], d + INTERVAL 2 WEEK TO DISK 'bbb'; ``` -Изменение TTL +Изменение TTL: ``` sql ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +Создание таблицы, в которой строки устаревают через месяц. Устаревшие строки удаляются, если дата выпадает на понедельник: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк. + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY k1, k2 +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **Удаление данных** Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных. @@ -666,4 +712,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства. -[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 0dcb6fd307d..165b54d9b62 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -5,7 +5,7 @@ toc_title: "\u0420\u0435\u0437\u0435\u0440\u0432\u043d\u043e\u0435\u0020\u043a\u # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh} -[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. +[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](server-configuration-parameters/settings.md#max-table-size-to-drop). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. Для того чтобы эффективно уменьшить возможные человеческие ошибки, следует тщательно подготовить стратегию резервного копирования и восстановления данных **заранее**. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index e78d4c98683..bfc0b0a2644 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска). -{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##} +## max_partitions_to_read {#max-partitions-to-read} + +Ограничивает максимальное число партиций для чтения в одном запросе. + +Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса. + +Возможные значения: + +- Любое положительное целое число. + +Значение по умолчанию: -1 (неограниченно). + +[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 73dc0b9d944..7322b6c9184 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -283,12 +283,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_empty_as_default {#settings-input-format-tsv-empty-as-default} -Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. +Если эта настройка включена, все пустые поля во входящем TSV заменяются значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. По умолчанию отключена. -Disabled by default. - ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. @@ -406,21 +404,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; Возможные значения: -- `'best_effort'` — включает расширенный парсинг. +- `best_effort` — включает расширенный парсинг. -ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`. +ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`. -- `'basic'` — используется базовый парсер. +- `basic` — используется базовый парсер. -ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`. +ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`. -Значение по умолчанию: `'basic'`. +Значение по умолчанию: `basic`. См. также: - [Тип данных DateTime.](../../sql-reference/data-types/datetime.md) - [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) +## date_time_output_format {#settings-date_time_output_format} + +Позволяет выбрать разные выходные форматы текстового представления даты и времени. + +Возможные значения: + +- `simple` - простой выходной формат. + + Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера. + +- `iso` - выходной формат ISO. + + Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC). + +- `unix_timestamp` - выходной формат Unix. + + Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`. + +Значение по умолчанию: `simple`. + +См. также: + +- [Тип данных DateTime](../../sql-reference/data-types/datetime.md) +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) + ## join_default_strictness {#settings-join_default_strictness} Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join). @@ -683,7 +706,7 @@ ClickHouse использует этот параметр при чтении д Установка логирования запроса. -Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). +Запросы, переданные в ClickHouse с этой настройкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Пример: @@ -1496,7 +1519,7 @@ ClickHouse генерирует исключение - Тип: секунды - Значение по умолчанию: 60 секунд -Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed_replica_error_half_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки. +Управляет скоростью обнуления счетчика ошибок в распределенных таблицах. Предположим, реплика остается недоступна в течение какого-то времени, и за этот период накопилось 5 ошибок. Если настройка `distributed_replica_error_half_life` установлена в значение 1 секунда, то реплика снова будет считаться доступной через 3 секунды после последней ошибки. См. также: @@ -1648,7 +1671,7 @@ ClickHouse генерирует исключение - Тип: bool - Значение по умолчанию: True -Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. +Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} @@ -1962,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset} -Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII. +Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII. **Пример** @@ -2448,4 +2471,70 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; Значение по умолчанию: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки). + +Возможные значения: + +- 0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки). +- Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов. +- 1 — трассировка для всех выполненных запросов включена. + +Значение по умолчанию: `0`. + +## optimize_on_insert {#optimize-on-insert} + +Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы). + +Возможные значения: + +- 0 — выключена +- 1 — включена. + +Значение по умолчанию: 1. + +**Пример** + +Сравните добавление данных при включенной и выключенной настройке: + +Запрос: + +```sql +SET optimize_on_insert = 1; + +CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; + +INSERT INTO test1 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test1; + +SET optimize_on_insert = 0; + +CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; + +INSERT INTO test2 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test2; +``` + +Результат: + +``` text +┌─FirstTable─┐ +│ 0 │ +│ 1 │ +└────────────┘ + +┌─SecondTable─┐ +│ 0 │ +│ 0 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────┘ +``` + +Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md). + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/system-tables/distributed_ddl_queue.md b/docs/ru/operations/system-tables/distributed_ddl_queue.md new file mode 100644 index 00000000000..058ed06f639 --- /dev/null +++ b/docs/ru/operations/system-tables/distributed_ddl_queue.md @@ -0,0 +1,65 @@ +# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} + +Содержит информацию о [распределенных ddl запросах (секция ON CLUSTER)](../../sql-reference/distributed-ddl.md), которые были выполнены на кластере. + +Столбцы: + +- `entry` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — имя хоста. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP-адрес хоста. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — состояние запроса. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера. +- `query` ([String](../../sql-reference/data-types/string.md)) — выполненный запрос. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper). + +**Пример** + +``` sql +SELECT * +FROM system.distributed_ddl_queue +WHERE cluster = 'test_cluster' +LIMIT 2 +FORMAT Vertical + +Query id: f544e72a-6641-43f1-836b-24baa1c9632a + +Row 1: +────── +entry: query-0000000000 +host_name: clickhouse01 +host_address: 172.23.0.11 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +Row 2: +────── +entry: query-0000000000 +host_name: clickhouse02 +host_address: 172.23.0.12 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +2 rows in set. Elapsed: 0.025 sec. +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) + \ No newline at end of file diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..96555064b0e --- /dev/null +++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,49 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов. + +Столбцы: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`. + +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/). + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md index 255ece76ee2..bba4fda6135 100644 --- a/docs/ru/operations/system-tables/part_log.md +++ b/docs/ru/operations/system-tables/part_log.md @@ -6,29 +6,62 @@ Столбцы: -- `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений: +- `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений: - `NEW_PART` — вставка нового куска. - `MERGE_PARTS` — слияние кусков. - `DOWNLOAD_PART` — загрузка с реплики. - `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - `MUTATE_PART` — изменение куска. - `MOVE_PART` — перемещение куска между дисками. -- `event_date` (Date) — дата события. -- `event_time` (DateTime) — время события. -- `duration_ms` (UInt64) — длительность. -- `database` (String) — имя базы данных, в которой находится кусок. -- `table` (String) — имя таблицы, в которой находится кусок. -- `part_name` (String) — имя куска. -- `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`. -- `rows` (UInt64) — число строк в куске. -- `size_in_bytes` (UInt64) — размер куска данных в байтах. -- `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска). -- `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт. -- `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков. -- `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков. -- `error` (UInt16) — код ошибки, возникшей при текущем событии. -- `exception` (String) — текст ошибки. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность. +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок. +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии. +- `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки. Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`. +**Пример** + +``` sql +SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 +event_type: NewPart +event_date: 2021-02-02 +event_time: 2021-02-02 11:14:28 +duration_ms: 35 +database: default +table: log_mt_2 +part_name: all_1_1_0 +partition_id: all +path_on_disk: db/data/default/log_mt_2/all_1_1_0/ +rows: 115418 +size_in_bytes: 1074311 +merged_from: [] +bytes_uncompressed: 0 +read_rows: 0 +read_bytes: 0 +peak_memory_usage: 0 +error: 0 +exception: +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 97edd5773c8..f44e65831a9 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -4,8 +4,63 @@ toc_priority: 106 # argMax {#agg-function-argmax} -Синтаксис: `argMax(arg, val)` +Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений. -Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений. +Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) +**Синтаксис** + +``` sql +argMax(arg, val) +``` + +или + +``` sql +argMax(tuple(arg, val)) +``` + +**Параметры** + +- `arg` — аргумент. +- `val` — значение. + +**Возвращаемое значение** + +- Значение `arg`, соответствующее максимальному значению `val`. + +Тип: соответствует типу `arg`. + +Если передан кортеж: + +- Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`. + +Тип: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Пример** + +Исходная таблица: + +``` text +┌─user─────┬─salary─┐ +│ director │ 5000 │ +│ manager │ 3000 │ +│ worker │ 1000 │ +└──────────┴────────┘ +``` + +Запрос: + +``` sql +SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary; +``` + +Результат: + +``` text +┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 58161cd226a..8c25b79f92a 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -4,11 +4,42 @@ toc_priority: 105 # argMin {#agg-function-argmin} -Синтаксис: `argMin(arg, val)` +Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений. -Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений. +Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -**Пример:** +**Синтаксис** + +``` sql +argMin(arg, val) +``` + +или + +``` sql +argMin(tuple(arg, val)) +``` + +**Параметры** + +- `arg` — аргумент. +- `val` — значение. + +**Возвращаемое значение** + +- Значение `arg`, соответствующее минимальному значению `val`. + +Тип: соответствует типу `arg`. + +Если передан кортеж: + +- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`. + +Тип: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Пример** + +Исходная таблица: ``` text ┌─user─────┬─salary─┐ @@ -18,14 +49,18 @@ toc_priority: 105 └──────────┴────────┘ ``` +Запрос: + ``` sql -SELECT argMin(user, salary) FROM salary +SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; ``` +Результат: + ``` text -┌─argMin(user, salary)─┐ -│ worker │ -└──────────────────────┘ +┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ +│ worker │ ('worker',1000) │ +└──────────────────────┴─────────────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md new file mode 100644 index 00000000000..fb73fff5f00 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -0,0 +1,71 @@ +--- +toc_priority: 310 +toc_title: mannWhitneyUTest +--- + +# mannWhitneyUTest {#mannwhitneyutest} + +Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние. +Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным. + +**Параметры** + +- `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: +- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 10 │ 0 │ +│ 11 │ 0 │ +│ 12 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest; +``` + +Результат: + +``` text +┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐ +│ (9,0.04042779918503192) │ +└────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md new file mode 100644 index 00000000000..5361e06c5e2 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md @@ -0,0 +1,65 @@ +--- +toc_priority: 300 +toc_title: studentTTest +--- + +# studentTTest {#studentttest} + +Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +studentTTest(sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать. + +**Параметры** + +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: +- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 21.1 │ 0 │ +│ 21.9 │ 1 │ +│ 21.7 │ 0 │ +│ 19.9 │ 1 │ +│ 21.8 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT studentTTest(sample_data, sample_index) FROM student_ttest; +``` + +Результат: + +``` text +┌─studentTTest(sample_data, sample_index)───┐ +│ (-0.21739130434783777,0.8385421208415731) │ +└───────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0) +- [welchTTest](welchttest.md#welchttest) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md new file mode 100644 index 00000000000..1f36b2d04ee --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md @@ -0,0 +1,65 @@ +--- +toc_priority: 301 +toc_title: welchTTest +--- + +# welchTTest {#welchttest} + +Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +welchTTest(sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать. + +**Параметры** + +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: +- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 22.1 │ 0 │ +│ 21.9 │ 0 │ +│ 18.9 │ 1 │ +│ 20.3 │ 1 │ +│ 19 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT welchTTest(sample_data, sample_index) FROM welch_ttest; +``` + +Результат: + +``` text +┌─welchTTest(sample_data, sample_index)─────┐ +│ (2.7988719532211235,0.051807360348581945) │ +└───────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0) +- [studentTTest](studentttest.md#studentttest) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) diff --git a/docs/ru/sql-reference/data-types/array.md b/docs/ru/sql-reference/data-types/array.md index 906246b66ee..86a23ed041b 100644 --- a/docs/ru/sql-reference/data-types/array.md +++ b/docs/ru/sql-reference/data-types/array.md @@ -47,6 +47,8 @@ SELECT [1, 2] AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} +Максимальный размер массива ограничен одним миллионом элементов. + При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../../sql-reference/data-types/array.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md). Если ClickHouse не смог подобрать тип данных, то он сгенерирует исключение. Это произойдёт, например, при попытке создать массив одновременно со строками и числами `SELECT array(1, 'a')`. diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 9894fa2802b..ffdf83e5bd0 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,7 +27,7 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format). diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md new file mode 100644 index 00000000000..6cb8ccf1143 --- /dev/null +++ b/docs/ru/sql-reference/data-types/map.md @@ -0,0 +1,69 @@ +--- +toc_priority: 65 +toc_title: Map(key, value) +--- + +# Map(key, value) {#data_type-map} + +Тип данных `Map(key, value)` хранит пары `ключ:значение`. + +**Параметры** +- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). + +!!! warning "Предупреждение" + Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`. + +Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью. + +**Примеры** + +Рассмотрим таблицу: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; +INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +``` + +Выборка всех значений ключа `key2`: + +```sql +SELECT a['key2'] FROM table_map; +``` +Результат: + +```text +┌─arrayElement(a, 'key2')─┐ +│ 10 │ +│ 20 │ +│ 30 │ +└─────────────────────────┘ +``` + +Если для какого-то ключа `key` в колонке с типом `Map()` нет значения, запрос возвращает нули для числовых колонок, пустые строки или пустые массивы. + +```sql +INSERT INTO table_map VALUES ({'key3':100}), ({}); +SELECT a['key3'] FROM table_map; +``` + +Результат: + +```text +┌─arrayElement(a, 'key3')─┐ +│ 100 │ +│ 0 │ +└─────────────────────────┘ +┌─arrayElement(a, 'key3')─┐ +│ 0 │ +│ 0 │ +│ 0 │ +└─────────────────────────┘ +``` + +**См. также** + +- функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) +- функция [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) + +[Original article](https://clickhouse.tech/docs/ru/data-types/map/) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index fc4a3ac7285..f6b8b670563 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -205,8 +205,8 @@ RANGE(MIN first MAX last) Особенности алгоритма: - Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря. -- Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий. -- Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон. +- Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона. +- Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон. Пример конфигурации: diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 015d14b9de5..80057e6f0e0 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1135,11 +1135,225 @@ SELECT Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arraySum(\[func,\] arr1, …) {#array-sum} +## arrayMin {#array-min} -Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива. +Возвращает значение минимального элемента в исходном массиве. -Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. +Если передана функция `func`, возвращается минимум из элементов массива, преобразованных этой функцией. + +Функция `arrayMin` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayMin([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Минимальное значение функции (или минимальный элемент массива). + +Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива. + +**Примеры** + +Запрос: + +```sql +SELECT arrayMin([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ -4 │ +└─────┘ +``` + +## arrayMax {#array-max} + +Возвращает значение максимального элемента в исходном массиве. + +Если передана функция `func`, возвращается максимум из элементов массива, преобразованных этой функцией. + +Функция `arrayMax` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayMax([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Максимальное значение функции (или максимальный элемент массива). + +Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива. + +**Примеры** + +Запрос: + +```sql +SELECT arrayMax([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 4 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ -1 │ +└─────┘ +``` + +## arraySum {#array-sum} + +Возвращает сумму элементов в исходном массиве. + +Если передана функция `func`, возвращается сумма элементов массива, преобразованных этой функцией. + +Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arraySum([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Сумма значений функции (или сумма элементов массива). + +Тип: для Decimal чисел в исходном массиве (если функция `func` была передана, то для чисел, преобразованных ею) — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +```sql +SELECT arraySum([2, 3]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 5 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arraySum(x -> x*x, [2, 3]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 13 │ +└─────┘ +``` + +## arrayAvg {#array-avg} + +Возвращает среднее значение элементов в исходном массиве. + +Если передана функция `func`, возвращается среднее значение элементов массива, преобразованных этой функцией. + +Функция `arrayAvg` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayAvg([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Среднее значение функции (или среднее значение элементов массива). + +Тип: [Float64](../../sql-reference/data-types/float.md). + +**Примеры** + +Запрос: + +```sql +SELECT arrayAvg([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌────────────────res─┐ +│ 2.3333333333333335 │ +└────────────────────┘ +``` + +Запрос: + +```sql +SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 10 │ +└─────┘ +``` ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index 724fb97c0d5..52f0a92bc9f 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -243,4 +243,81 @@ SELECT └───────────────────────────────────┴──────────────────────────────────┘ ``` +## isIPv4String {#isipv4string} + +Определяет, является ли строка адресом IPv4 или нет. Также вернет `0`, если `string` — адрес IPv6. + +**Синтаксис** + +```sql +isIPv4String(string) +``` + +**Параметры** + +- `string` — IP адрес. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- `1` если `string` является адресом IPv4 , иначе — `0`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +```sql +SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Результат: + +``` text +┌─addr─────────────┬─isIPv4String(addr)─┐ +│ 0.0.0.0 │ 1 │ +│ 127.0.0.1 │ 1 │ +│ ::ffff:127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + +## isIPv6String {#isipv6string} + +Определяет, является ли строка адресом IPv6 или нет. Также вернет `0`, если `string` — адрес IPv4. + +**Синтаксис** + +```sql +isIPv6String(string) +``` + +**Параметры** + +- `string` — IP адрес. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- `1` если `string` является адресом IPv6 , иначе — `0`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +``` sql +SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Результат: + +``` text +┌─addr─────────────┬─isIPv6String(addr)─┐ +│ :: │ 1 │ +│ 1111::ffff │ 1 │ +│ ::ffff:127.0.0.1 │ 1 │ +│ 127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 68afb3e24ce..a738ba755b1 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -183,6 +183,103 @@ SELECT visibleWidth(NULL) Получить размер блока. В ClickHouse выполнение запроса всегда идёт по блокам (наборам кусочков столбцов). Функция позволяет получить размер блока, для которого её вызвали. +## byteSize {#function-bytesize} + +Возвращает оценку в байтах размера аргументов в памяти в несжатом виде. + +**Синтаксис** + +```sql +byteSize(argument [, ...]) +``` + +**Параметры** + +- `argument` — значение. + +**Возвращаемое значение** + +- Оценка размера аргументов в памяти в байтах. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Для аргументов типа [String](../../sql-reference/data-types/string.md) функция возвращает длину строки + 9 (нуль-терминатор + длина) + +Запрос: + +```sql +SELECT byteSize('string'); +``` + +Результат: + +```text +┌─byteSize('string')─┐ +│ 15 │ +└────────────────────┘ +``` + +Запрос: + +```sql +CREATE TABLE test +( + `key` Int32, + `u8` UInt8, + `u16` UInt16, + `u32` UInt32, + `u64` UInt64, + `i8` Int8, + `i16` Int16, + `i32` Int32, + `i64` Int64, + `f32` Float32, + `f64` Float64 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO test VALUES(1, 8, 16, 32, 64, -8, -16, -32, -64, 32.32, 64.64); + +SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +key: 1 +byteSize(UInt8): 1 +byteSize(UInt16): 2 +byteSize(UInt32): 4 +byteSize(UInt64): 8 +byteSize(Int8): 1 +byteSize(Int16): 2 +byteSize(Int32): 4 +byteSize(Int64): 8 +byteSize(Float32): 4 +byteSize(Float64): 8 +``` + +Если функция принимает несколько аргументов, то она возвращает их совокупный размер в байтах. + +Запрос: + +```sql +SELECT byteSize(NULL, 1, 0.3, ''); +``` + +Результат: + +```text +┌─byteSize(NULL, 1, 0.3, '')─┐ +│ 19 │ +└────────────────────────────┘ +``` + ## materialize(x) {#materializex} Превращает константу в полноценный столбец, содержащий только одно значение. diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index e8cbb8deec4..b7193da6f33 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -13,8 +13,6 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u Возвращает позицию (в байтах) найденной подстроки в строке, начиная с 1, или 0, если подстрока не найдена. -Работает при допущении, что строка содержит набор байт, представляющий текст в однобайтовой кодировке. Если допущение не выполнено — то возвращает неопределенный результат (не кидает исключение). Если символ может быть представлен с помощью двух байтов, он будет представлен двумя байтами и так далее. - Для поиска без учета регистра используйте функцию [positionCaseInsensitive](#positioncaseinsensitive). **Синтаксис** diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index a2b25e68fe5..a36613280a1 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -5,6 +5,66 @@ toc_title: Работа с контейнерами map # Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps} +## map {#function-map} + +Преобразовывает пары `ключ:значение` в тип данных [Map(key, value)](../../sql-reference/data-types/map.md). + +**Синтаксис** + +``` sql +map(key1, value1[, key2, value2, ...]) +``` + +**Параметры** + +- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Структура данных в виде пар `ключ:значение`. + +Тип: [Map(key, value)](../../sql-reference/data-types/map.md). + +**Примеры** + +Запрос: + +``` sql +SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +``` + +Результат: + +``` text +┌─map('key1', number, 'key2', multiply(number, 2))─┐ +│ {'key1':0,'key2':0} │ +│ {'key1':1,'key2':2} │ +│ {'key1':2,'key2':4} │ +└──────────────────────────────────────────────────┘ +``` + +Запрос: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; +INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +SELECT a['key2'] FROM table_map; +``` + +Результат: + +``` text +┌─arrayElement(a, 'key2')─┐ +│ 0 │ +│ 2 │ +│ 4 │ +└─────────────────────────┘ +``` + +**См. также** + +- тип данных [Map(key, value)](../../sql-reference/data-types/map.md) ## mapAdd {#function-mapadd} Собирает все ключи и суммирует соответствующие значения. diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 1008e2a359c..7541e16bed4 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk') Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня). + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} + +Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Первый существенный поддомен. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### port(URL[, default_port = 0]) {#port} Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 09026874948..f4b91b5ae17 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -56,9 +56,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления. +Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние. + Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`. Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`. -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) - +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index d83f6691f6b..0ad85ed0166 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. +Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). В качестве примера рассмотрим таблицу: @@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable - Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы. - Нули и пустые строки, если `DEFAULT` не определены. -Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе. - В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде: ``` sql diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index c37e82ae0be..b0b6e80d7be 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -162,6 +162,112 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию. +## Модификаторы запроса SELECT {#select-modifiers} + +Вы можете использовать следующие модификаторы в запросах `SELECT`. + +### APPLY {#apply-modifier} + +Вызывает указанную функцию для каждой строки, возвращаемой внешним табличным выражением запроса. + +**Синтаксис:** + +``` sql +SELECT APPLY( ) FROM [db.]table_name +``` + +**Пример:** + +``` sql +CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i); +INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23); +SELECT * APPLY(sum) FROM columns_transformers; +``` + +``` +┌─sum(i)─┬─sum(j)─┬─sum(k)─┐ +│ 220 │ 18 │ 347 │ +└────────┴────────┴────────┘ +``` + +### EXCEPT {#except-modifier} + +Исключает из результата запроса один или несколько столбцов. + +**Синтаксис:** + +``` sql +SELECT EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name +``` + +**Пример:** + +``` sql +SELECT * EXCEPT (i) from columns_transformers; +``` + +``` +┌──j─┬───k─┐ +│ 10 │ 324 │ +│ 8 │ 23 │ +└────┴─────┘ +``` + +### REPLACE {#replace-modifier} + +Определяет одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`. + +Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения. + +**Синтаксис:** + +``` sql +SELECT REPLACE( AS col_name) from [db.]table_name +``` + +**Пример:** + +``` sql +SELECT * REPLACE(i + 1 AS i) from columns_transformers; +``` + +``` +┌───i─┬──j─┬───k─┐ +│ 101 │ 10 │ 324 │ +│ 121 │ 8 │ 23 │ +└─────┴────┴─────┘ +``` + +### Комбинации модификаторов {#modifier-combinations} + +Вы можете использовать каждый модификатор отдельно или комбинировать их. + +**Примеры:** + +Использование одного и того же модификатора несколько раз. + +``` sql +SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers; +``` + +``` +┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐ +│ 2 │ 3 │ +└──────────────────────────┴──────────────────────────┘ +``` + +Использование нескольких модификаторов в одном запросе. + +``` sql +SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; +``` + +``` +┌─sum(plus(i, 1))─┬─sum(k)─┐ +│ 222 │ 347 │ +└─────────────────┴────────┘ +``` + ## SETTINGS в запросе SELECT {#settings-in-select} Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. @@ -174,5 +280,4 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1; ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/) - +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/) diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 2fffcbe7ef3..353dd5f5bc8 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR ### 列 TTL {#mergetree-column-ttl} -当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中此列。 +当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。 `TTL`子句不能被用于主键字段。 diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 7a0a42fa47c..3b89da9f595 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] VersionedCollapsingMergeTree(sign, version) ``` -- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 行 列数据类型应为 `Int8`. diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 72491bb53ff..1b1993e3ae6 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "\u6570\u636E\u5907\u4EFD" # 数据备份 {#data-backup} -尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 +尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](server-configuration-parameters/settings.md#max-table-size-to-drop). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index f834ab74f5a..64625c19c6a 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1310,3 +1310,14 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; **另请参阅** - [IN 运算符中的 NULL 处理](../../sql-reference/operators/in.md#in-null-processing) + +## max_final_threads {#max-final-threads} + +设置使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final) 限定符的`SELECT`查询, 在数据读取阶段的最大并发线程数。 + +可能的值: + +- 正整数。 +- 0 or 1 — 禁用。 此时`SELECT` 查询单线程执行。 + +默认值: `16`。 diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index b66e5262df3..f7e816ccee6 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。动物园管理员 {#system-zookeeper} 如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 要输出所有根节点的数据,write path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 +查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。 +如果在指定的 ‘path’ 集合中有不存在的路径,将引发异常。 +它可以用来做一批ZooKeeper路径查询。 + 列: - `name` (String) — The name of the node. diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md index 436a8f433ea..2344c3e6dc0 100644 --- a/docs/zh/sql-reference/aggregate-functions/index.md +++ b/docs/zh/sql-reference/aggregate-functions/index.md @@ -1,11 +1,12 @@ --- +toc_folder_title: 聚合函数 toc_priority: 33 -toc_title: 聚合函数 +toc_title: 简介 --- # 聚合函数 {#aggregate-functions} -聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。 +聚合函数如数据库专家预期的方式 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 工作。 ClickHouse还支持: @@ -14,7 +15,7 @@ ClickHouse还支持: ## 空处理 {#null-processing} -在聚合过程中,所有 `NULL`s被跳过。 +在聚合过程中,所有 `NULL` 被跳过。 **例:** @@ -30,7 +31,7 @@ ClickHouse还支持: └───┴──────┘ ``` -比方说,你需要在总的值 `y` 列: +比方说,你需要计算 `y` 列的总数: ``` sql SELECT sum(y) FROM t_null_big @@ -40,9 +41,8 @@ SELECT sum(y) FROM t_null_big │ 7 │ └────────┘ -该 `sum` 函数解释 `NULL` 作为 `0`. 特别是,这意味着,如果函数接收输入的选择,其中所有的值 `NULL`,那么结果将是 `0`,不 `NULL`. -现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列: +现在你可以使用 `groupArray` 函数用 `y` 列创建一个数组: ``` sql SELECT groupArray(y) FROM t_null_big @@ -54,6 +54,6 @@ SELECT groupArray(y) FROM t_null_big └───────────────┘ ``` -`groupArray` 不包括 `NULL` 在生成的数组中。 +在 `groupArray` 生成的数组中不包括 `NULL`。 [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md index cf7dddb9b7e..3a224886a00 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference.md +++ b/docs/zh/sql-reference/aggregate-functions/reference.md @@ -1,9 +1,9 @@ --- toc_priority: 36 -toc_title: 聚合函数 +toc_title: 参考手册 --- -# 聚合函数引用 {#aggregate-functions-reference} +# 参考手册 {#aggregate-functions-reference} ## count {#agg_function-count} diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md index a8b49febab5..71b7cd319eb 100644 --- a/docs/zh/sql-reference/statements/select/from.md +++ b/docs/zh/sql-reference/statements/select/from.md @@ -25,11 +25,13 @@ toc_title: FROM - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。 +现在使用 `FINAL` 修饰符 的 `SELECT` 查询启用了并发执行, 这会快一点。但是仍然存在缺陷 (见下)。 [max_final_threads](../../../operations/settings/settings.md#max-final-threads) 设置使用的最大线程数限制。 + ### 缺点 {#drawbacks} -使用的查询 `FINAL` 执行速度不如类似的查询那么快,因为: +使用的查询 `FINAL` 执行速度比类似的查询慢一点,因为: -- 查询在单个线程中执行,并在查询执行期间合并数据。 +- 在查询执行期间合并数据。 - 查询与 `FINAL` 除了读取查询中指定的列之外,还读取主键列。 **在大多数情况下,避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生,并通过应用聚合(例如,丢弃重复项)来处理它。 {## TODO: examples ##} diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index ae1d16ce402..a0e2ea155ba 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -62,12 +62,12 @@ public: bool randomize_, size_t max_iterations_, double max_time_, const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, bool continue_on_errors_, - bool print_stacktrace_, const Settings & settings_) + bool reconnect_, bool print_stacktrace_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), json_path(json_path_), confidence(confidence_), query_id(query_id_), - query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), + query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_), print_stacktrace(print_stacktrace_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) @@ -155,6 +155,7 @@ private: String query_id; String query_to_execute; bool continue_on_errors; + bool reconnect; bool print_stacktrace; const Settings & settings; SharedContextHolder shared_context; @@ -404,9 +405,14 @@ private: void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) { Stopwatch watch; + + Connection & connection = **connection_entries[connection_index]; + + if (reconnect) + connection.disconnect(); + RemoteBlockInputStream stream( - *(*connection_entries[connection_index]), - query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) stream.setQueryId(query_id); @@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ("continue_on_errors", "continue testing even if a query fails") + ("reconnect", "establish new connection for every query") ; Settings settings; @@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["confidence"].as(), options["query_id"].as(), options["query"].as(), - options.count("continue_on_errors") > 0, + options.count("continue_on_errors"), + options.count("reconnect"), print_stacktrace, settings); return benchmark.run(); diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9a8b580407a..3c27908741c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1374,7 +1374,30 @@ private: { // Probably the server is dead because we found an assertion // failure. Fail fast. - fmt::print(stderr, "Lost connection to the server\n"); + fmt::print(stderr, "Lost connection to the server.\n"); + + // Print the changed settings because they might be needed to + // reproduce the error. + const auto & changes = context.getSettingsRef().changes(); + if (!changes.empty()) + { + fmt::print(stderr, "Changed settings: "); + for (size_t i = 0; i < changes.size(); ++i) + { + if (i) + { + fmt::print(stderr, ", "); + } + fmt::print(stderr, "{} = '{}'", changes[i].name, + toString(changes[i].value)); + } + fmt::print(stderr, "\n"); + } + else + { + fmt::print(stderr, "No changed settings.\n"); + } + return false; } @@ -1719,7 +1742,7 @@ private: } // Remember where the data ended. We use this info later to determine // where the next query begins. - parsed_insert_query->end = data_in.buffer().begin() + data_in.count(); + parsed_insert_query->end = parsed_insert_query->data + data_in.count(); } else if (!is_interactive) { @@ -1900,6 +1923,9 @@ private: switch (packet.type) { + case Protocol::Server::PartUUIDs: + return true; + case Protocol::Server::Data: if (!cancelled) onData(packet.block); diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp index ae0de450a10..8d8d8daaf39 100644 --- a/programs/client/QueryFuzzer.cpp +++ b/programs/client/QueryFuzzer.cpp @@ -325,6 +325,61 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) // the generic recursion into IAST.children. } +void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame) +{ + switch (fuzz_rand() % 40) + { + case 0: + { + const auto r = fuzz_rand() % 3; + frame.type = r == 0 ? WindowFrame::FrameType::Rows + : r == 1 ? WindowFrame::FrameType::Range + : WindowFrame::FrameType::Groups; + break; + } + case 1: + { + const auto r = fuzz_rand() % 3; + frame.begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded + : r == 1 ? WindowFrame::BoundaryType::Current + : WindowFrame::BoundaryType::Offset; + break; + } + case 2: + { + const auto r = fuzz_rand() % 3; + frame.end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded + : r == 1 ? WindowFrame::BoundaryType::Current + : WindowFrame::BoundaryType::Offset; + break; + } + case 3: + { + frame.begin_offset = getRandomField(0).get(); + break; + } + case 4: + { + frame.end_offset = getRandomField(0).get(); + break; + } + case 5: + { + frame.begin_preceding = fuzz_rand() % 2; + break; + } + case 6: + { + frame.end_preceding = fuzz_rand() % 2; + break; + } + default: + break; + } + + frame.is_default = (frame == WindowFrame{}); +} + void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) @@ -409,6 +464,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast) auto & def = fn->window_definition->as(); fuzzColumnLikeExpressionList(def.partition_by.get()); fuzzOrderByList(def.order_by.get()); + fuzzWindowFrame(def.frame); } fuzz(fn->children); @@ -421,6 +477,23 @@ void QueryFuzzer::fuzz(ASTPtr & ast) fuzz(select->children); } + /* + * The time to fuzz the settings has not yet come. + * Apparently we don't have any infractructure to validate the values of + * the settings, and the first query with max_block_size = -1 breaks + * because of overflows here and there. + *//* + * else if (auto * set = typeid_cast(ast.get())) + * { + * for (auto & c : set->changes) + * { + * if (fuzz_rand() % 50 == 0) + * { + * c.value = fuzzField(c.value); + * } + * } + * } + */ else if (auto * literal = typeid_cast(ast.get())) { // There is a caveat with fuzzing the children: many ASTs also keep the diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h index e9d3f150283..38714205967 100644 --- a/programs/client/QueryFuzzer.h +++ b/programs/client/QueryFuzzer.h @@ -14,6 +14,7 @@ namespace DB class ASTExpressionList; class ASTOrderByElement; +struct WindowFrame; /* * This is an AST-based query fuzzer that makes random modifications to query @@ -65,6 +66,7 @@ struct QueryFuzzer void fuzzOrderByElement(ASTOrderByElement * elem); void fuzzOrderByList(IAST * ast); void fuzzColumnLikeExpressionList(IAST * ast); + void fuzzWindowFrame(WindowFrame & frame); void fuzz(ASTs & asts); void fuzz(ASTPtr & ast); void collectFuzzInfoMain(const ASTPtr ast); diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index 87083c2c27b..dfa7048349e 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -1,5 +1,6 @@ #include "Suggest.h" +#include #include #include @@ -86,6 +87,9 @@ Suggest::Suggest() void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) { + /// NOTE: Once you will update the completion list, + /// do not forget to update 01676_clickhouse_client_autocomplete.sh + std::stringstream query; // STYLE_CHECK_ALLOW_STD_STRING_STREAM query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" "SELECT name FROM system.functions" @@ -104,6 +108,18 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo " UNION ALL " "SELECT cluster FROM system.clusters" " UNION ALL " + "SELECT name FROM system.errors" + " UNION ALL " + "SELECT event FROM system.events" + " UNION ALL " + "SELECT metric FROM system.asynchronous_metrics" + " UNION ALL " + "SELECT metric FROM system.metrics" + " UNION ALL " + "SELECT macro FROM system.macros" + " UNION ALL " + "SELECT policy_name FROM system.storage_policies" + " UNION ALL " "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate"; /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero. @@ -123,12 +139,17 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo query << ") WHERE notEmpty(res)"; - fetch(connection, timeouts, query.str()); + Settings settings; + /// To show all rows from: + /// - system.errors + /// - system.events + settings.system_events_show_zero_values = true; + fetch(connection, timeouts, query.str(), settings); } -void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) +void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings) { - connection.sendQuery(timeouts, query); + connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings); while (true) { diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h index 03332088cbe..0049bc08ebf 100644 --- a/programs/client/Suggest.h +++ b/programs/client/Suggest.h @@ -33,7 +33,7 @@ public: private: void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit); - void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query); + void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings); void fillWordsFromBlock(const Block & block); /// Words are fetched asynchronously. diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index c073ab38aea..66e7afd8f8c 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -29,4 +29,25 @@ {display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 {display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 + + diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index ca09e7c1889..7eea23160b2 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts) } } - /// Delete helping tables in both cases (whole table is done or not) - dropHelpingTables(task_table); - if (!table_is_done) { throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution", @@ -642,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) + ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;"; - LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string); + LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string); UInt64 num_nodes = executeQueryOnCluster( task_table.cluster_push, @@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab { LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions); } + else + { + /// Delete helping tables in case that whole table is done + dropHelpingTables(task_table); + } return table_is_done; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2bb5181d348..a96cb2b8973 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -59,7 +59,6 @@ #include #include #include -#include #include "MetricsTransmitter.h" #include #include @@ -94,6 +93,9 @@ # include #endif +#if USE_NURAFT +# include +#endif namespace CurrentMetrics { @@ -842,23 +844,33 @@ int Server::main(const std::vector & /*args*/) listen_try = true; } - for (const auto & listen_host : listen_hosts) + if (config().has("test_keeper_server")) { - /// TCP TestKeeper - const char * port_name = "test_keeper_server.tcp_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) +#if USE_NURAFT + /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. + global_context->initializeNuKeeperStorageDispatcher(); + for (const auto & listen_host : listen_hosts) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers_to_start_before_tables->emplace_back( - port_name, - std::make_unique( - new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + /// TCP NuKeeper + const char * port_name = "test_keeper_server.tcp_port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers_to_start_before_tables->emplace_back( + port_name, + std::make_unique( + new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString()); + }); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif - LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString()); - }); } for (auto & server : *servers_to_start_before_tables) @@ -898,6 +910,8 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); else LOG_INFO(log, "Closed connections to servers for tables."); + + global_context->shutdownNuKeeperStorageDispatcher(); } /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. diff --git a/programs/server/config.xml b/programs/server/config.xml index 849d3dc32ba..ca57987d901 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -421,9 +421,15 @@ - + + + + default diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 426c27ea799..80594f66dfc 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -217,6 +217,7 @@ namespace /// Write the file. WriteBufferFromFile out{tmp_file_path.string()}; out.write(file_contents.data(), file_contents.size()); + out.close(); /// Rename. std::filesystem::rename(tmp_file_path, file_path); @@ -274,6 +275,7 @@ namespace writeStringBinary(name, out); writeUUIDText(id, out); } + out.close(); } diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index e9d586a692f..4a77426004d 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -26,10 +26,6 @@ struct EnabledQuota::Impl std::chrono::seconds duration, std::chrono::system_clock::time_point end_of_interval) { - std::function amount_to_string = [](UInt64 amount) { return std::to_string(amount); }; - if (resource_type == Quota::EXECUTION_TIME) - amount_to_string = [&](UInt64 amount) { return ext::to_string(std::chrono::nanoseconds(amount)); }; - const auto & type_info = Quota::ResourceTypeInfo::get(resource_type); throw Exception( "Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: " @@ -39,35 +35,47 @@ struct EnabledQuota::Impl } + /// Returns the end of the current interval. If the passed `current_time` is greater than that end, + /// the function automatically recalculates the interval's end by adding the interval's duration + /// one or more times until the interval's end is greater than `current_time`. + /// If that recalculation occurs the function also resets amounts of resources used and sets the variable + /// `counters_were_reset`. static std::chrono::system_clock::time_point getEndOfInterval( - const Interval & interval, std::chrono::system_clock::time_point current_time, bool * counters_were_reset = nullptr) + const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset) { auto & end_of_interval = interval.end_of_interval; auto end_loaded = end_of_interval.load(); auto end = std::chrono::system_clock::time_point{end_loaded}; if (current_time < end) { - if (counters_were_reset) - *counters_were_reset = false; + counters_were_reset = false; return end; } - const auto duration = interval.duration; + /// We reset counters only if the interval's end has been calculated before. + /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. + bool need_reset_counters = (end_loaded.count() != 0); do { - end = end + (current_time - end + duration) / duration * duration; + /// Calculate the end of the next interval: + /// | X | + /// end current_time next_end = end + duration * n + /// where n is an integer number, n >= 1. + const auto duration = interval.duration; + UInt64 n = static_cast((current_time - end + duration) / duration); + end = end + duration * n; if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) - { - boost::range::fill(interval.used, 0); break; - } end = std::chrono::system_clock::time_point{end_loaded}; } while (current_time >= end); - if (counters_were_reset) - *counters_were_reset = true; + if (need_reset_counters) + { + boost::range::fill(interval.used, 0); + counters_were_reset = true; + } return end; } @@ -89,7 +97,7 @@ struct EnabledQuota::Impl if (used > max) { bool counters_were_reset = false; - auto end_of_interval = getEndOfInterval(interval, current_time, &counters_were_reset); + auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); if (counters_were_reset) { used = (interval.used[resource_type] += amount); @@ -116,9 +124,9 @@ struct EnabledQuota::Impl continue; if (used > max) { - bool used_counters_reset = false; - std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, &used_counters_reset); - if (!used_counters_reset) + bool counters_were_reset = false; + std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + if (!counters_were_reset) throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval); } } @@ -177,7 +185,8 @@ std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_ auto & out = usage.intervals.back(); out.duration = in.duration; out.randomize_interval = in.randomize_interval; - out.end_of_interval = Impl::getEndOfInterval(in, current_time); + bool counters_were_reset = false; + out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset); for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) { if (in.max[resource_type]) diff --git a/src/Access/Quota.h b/src/Access/Quota.h index b636e83ec40..430bdca29b0 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -31,6 +31,8 @@ struct Quota : public IAccessEntity enum ResourceType { QUERIES, /// Number of queries. + QUERY_SELECTS, /// Number of select queries. + QUERY_INSERTS, /// Number of inserts queries. ERRORS, /// Number of queries with exceptions. RESULT_ROWS, /// Number of rows returned as result. RESULT_BYTES, /// Number of bytes returned as result. @@ -152,6 +154,16 @@ inline const Quota::ResourceTypeInfo & Quota::ResourceTypeInfo::get(ResourceType static const auto info = make_info("QUERIES", 1); return info; } + case Quota::QUERY_SELECTS: + { + static const auto info = make_info("QUERY_SELECTS", 1); + return info; + } + case Quota::QUERY_INSERTS: + { + static const auto info = make_info("QUERY_INSERTS", 1); + return info; + } case Quota::ERRORS: { static const auto info = make_info("ERRORS", 1); diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp index fada039e20a..c699dd4f217 100644 --- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp +++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp @@ -60,7 +60,7 @@ public: return std::make_shared(); } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { if (std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= throw_probability) throw Exception("Aggregate function " + getName() + " has thrown exception successfully", ErrorCodes::AGGREGATE_FUNCTION_THROW); @@ -68,7 +68,7 @@ public: new (place) Data; } - void destroy(AggregateDataPtr place) const noexcept override + void destroy(AggregateDataPtr __restrict place) const noexcept override { data(place).~Data(); } diff --git a/src/AggregateFunctions/AggregateFunctionAny.cpp b/src/AggregateFunctions/AggregateFunctionAny.cpp index 0aeb2548af9..8b18abae884 100644 --- a/src/AggregateFunctions/AggregateFunctionAny.cpp +++ b/src/AggregateFunctions/AggregateFunctionAny.cpp @@ -34,6 +34,14 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) factory.registerFunction("any", { createAggregateFunctionAny, properties }); factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties }); factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties }); + + // Synonyms for use as window functions. + factory.registerFunction("first_value", + { createAggregateFunctionAny, properties }, + AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("last_value", + { createAggregateFunctionAnyLast, properties }, + AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 67f21db0240..b559c1c8a7e 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -70,25 +70,25 @@ public: return type_res; } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena)) this->data(place).result.change(*columns[0], row_num, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { if (this->data(place).value.changeIfBetter(this->data(rhs).value, arena)) this->data(place).result.change(this->data(rhs).result, arena); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).result.write(buf, *type_res); this->data(place).value.write(buf, *type_val); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { this->data(place).result.read(buf, *type_res, arena); this->data(place).value.read(buf, *type_val, arena); @@ -96,7 +96,7 @@ public: bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { if (tuple_argument) { diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index e72fd3ab6ff..ef16fcde87b 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -47,12 +47,12 @@ public: return nested_func->getReturnType(); } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { nested_func->create(place); } - void destroy(AggregateDataPtr place) const noexcept override + void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroy(place); } @@ -77,7 +77,7 @@ public: return nested_func->isState(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { const IColumn * nested[num_arguments]; @@ -104,22 +104,22 @@ public: nested_func->add(place, nested, i, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { nested_func->merge(place, rhs, arena); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { nested_func->deserialize(place, buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { nested_func->insertResultInto(place, to, arena); } diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index e2f912cc320..7bf742294b4 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -98,13 +98,13 @@ public: DataTypePtr getReturnType() const final { return std::make_shared>(); } - void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).numerator += this->data(rhs).numerator; this->data(place).denominator += this->data(rhs).denominator; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeBinary(this->data(place).numerator, buf); @@ -114,7 +114,7 @@ public: writeBinary(this->data(place).denominator, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { readBinary(this->data(place).numerator, buf); @@ -124,7 +124,7 @@ public: readBinary(this->data(place).denominator, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { if constexpr (IsDecimalNumber || IsDecimalNumber) assert_cast &>(to).getData().push_back( @@ -148,7 +148,7 @@ class AggregateFunctionAvg final : public AggregateFunctionAvgBase, UInt64, AggregateFunctionAvg>::AggregateFunctionAvgBase; - void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const final + void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final { this->data(place).numerator += static_cast &>(*columns[0]).getData()[row_num]; ++this->data(place).denominator; diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index ab9ce9c2a61..f8b452fc444 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -28,7 +28,7 @@ public: using ValueT = MaxFieldType; - void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { const auto& weights = static_cast &>(*columns[1]); diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index 6d9eb3c36e1..3ba8e045069 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -54,27 +54,27 @@ public: return std::make_shared>(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { this->data(place).update(assert_cast &>(*columns[0]).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).update(this->data(rhs).value); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeBinary(this->data(place).value, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { readBinary(this->data(place).value, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { assert_cast &>(to).getData().push_back(this->data(place).value); } diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 7c254668f8d..32ae22fd573 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -127,7 +127,7 @@ public: return std::make_shared(); } - void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override { /// NOTE Slightly inefficient. const auto x = columns[0]->getFloat64(row_num); @@ -135,22 +135,22 @@ public: data(place).add(x, y); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { data(place).merge(data(rhs)); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(getBoundingRatio(data(place))); } diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h index aa205a71c97..ba8acb208ea 100644 --- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h +++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h @@ -33,7 +33,7 @@ public: return "categoricalInformationValue"; } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { memset(place, 0, sizeOfData()); } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 63d3d34a0fd..1b3a0acb528 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -38,7 +38,7 @@ public: return std::make_shared(); } - void add(AggregateDataPtr place, const IColumn **, size_t, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn **, size_t, Arena *) const override { ++data(place).count; } @@ -76,28 +76,28 @@ public: } } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { data(place).count += data(rhs).count; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeVarUInt(data(place).count, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { readVarUInt(data(place).count, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(data(place).count); } /// Reset the state to specified value. This function is not the part of common interface. - void set(AggregateDataPtr place, UInt64 new_count) + void set(AggregateDataPtr __restrict place, UInt64 new_count) { data(place).count = new_count; } @@ -126,27 +126,27 @@ public: return std::make_shared(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { data(place).count += !assert_cast(*columns[0]).isNullAt(row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { data(place).count += data(rhs).count; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeVarUInt(data(place).count, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { readVarUInt(data(place).count, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(data(place).count); } diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp new file mode 100644 index 00000000000..231b730d1aa --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp @@ -0,0 +1,49 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionDeltaSum( + const String & name, + const DataTypes & arguments, + const Array & params) +{ + assertNoParameters(name, params); + + if (arguments.size() != 1) + throw Exception("Incorrect number of arguments for aggregate function " + name, + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + DataTypePtr data_type = arguments[0]; + + if (isInteger(data_type) || isFloat(data_type)) + return AggregateFunctionPtr(createWithNumericType( + *data_type, arguments, params)); + else + throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name, + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); +} +} + +void registerAggregateFunctionDeltaSum(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; + + factory.registerFunction("deltaSum", { createAggregateFunctionDeltaSum, properties }); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h new file mode 100644 index 00000000000..d5760de84ae --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h @@ -0,0 +1,129 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +#include + + +namespace DB +{ + +template +struct AggregationFunctionDeltaSumData +{ + T sum = 0; + T last = 0; + T first = 0; + bool seen_last = false; + bool seen_first = false; +}; + +template +class AggregationFunctionDeltaSum final + : public IAggregateFunctionDataHelper, AggregationFunctionDeltaSum> +{ +public: + AggregationFunctionDeltaSum(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{arguments, params} + {} + + AggregationFunctionDeltaSum() + : IAggregateFunctionDataHelper, AggregationFunctionDeltaSum>{} + {} + + String getName() const override { return "deltaSum"; } + + DataTypePtr getReturnType() const override { return std::make_shared>(); } + + void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + auto value = assert_cast &>(*columns[0]).getData()[row_num]; + + if ((this->data(place).last < value) && this->data(place).seen_last) + { + this->data(place).sum += (value - this->data(place).last); + } + + this->data(place).last = value; + this->data(place).seen_last = true; + + if (!this->data(place).seen_first) + { + this->data(place).first = value; + this->data(place).seen_first = true; + } + } + + void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + auto place_data = &this->data(place); + auto rhs_data = &this->data(rhs); + + if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first) + { + // If the lhs last number seen is less than the first number the rhs saw, the lhs is before + // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the + // difference between lhs last number and rhs first number (the 2 and 4). Then we want to + // take last value from the rhs, so first and last become 0 and 7. + + place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last); + place_data->last = rhs_data->last; + } + else if ((rhs_data->last < place_data->first && rhs_data->seen_last && place_data->seen_first)) + { + // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we + // assume the input interval states are sorted by time, we assume this is a counter + // reset, and therefore do *not* add the difference between our first value and the + // rhs last value. + + place_data->sum += rhs_data->sum; + place_data->first = rhs_data->first; + } + else if (rhs_data->seen_first) + { + // If we're here then the lhs is an empty state and the rhs does have some state, so + // we'll just take that state. + + place_data->first = rhs_data->first; + place_data->seen_first = rhs_data->seen_first; + place_data->last = rhs_data->last; + place_data->seen_last = rhs_data->seen_last; + place_data->sum = rhs_data->sum; + } + + // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values. + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override + { + writeIntBinary(this->data(place).sum, buf); + writeIntBinary(this->data(place).first, buf); + writeIntBinary(this->data(place).last, buf); + writePODBinary(this->data(place).seen_first, buf); + writePODBinary(this->data(place).seen_last, buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override + { + readIntBinary(this->data(place).sum, buf); + readIntBinary(this->data(place).first, buf); + readIntBinary(this->data(place).last, buf); + readPODBinary(this->data(place).seen_first, buf); + readPODBinary(this->data(place).seen_last, buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + assert_cast &>(to).getData().push_back(this->data(place).sum); + } +}; + +} diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index f9c8f2651dc..b481e2a28e7 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -156,12 +156,12 @@ private: AggregateFunctionPtr nested_func; size_t arguments_num; - AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept + AggregateDataPtr getNestedPlace(AggregateDataPtr __restrict place) const noexcept { return place + prefix_size; } - ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr place) const noexcept + ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr __restrict place) const noexcept { return place + prefix_size; } @@ -172,27 +172,27 @@ public: , nested_func(nested_func_) , arguments_num(arguments.size()) {} - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { this->data(place).add(columns, arguments_num, row_num, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs), arena); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { this->data(place).deserialize(buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { auto arguments = this->data(place).getArguments(this->argument_types); ColumnRawPtrs arguments_raw(arguments.size()); @@ -209,13 +209,13 @@ public: return prefix_size + nested_func->sizeOfData(); } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { new (place) Data; nested_func->create(getNestedPlace(place)); } - void destroy(AggregateDataPtr place) const noexcept override + void destroy(AggregateDataPtr __restrict place) const noexcept override { this->data(place).~Data(); nested_func->destroy(getNestedPlace(place)); diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index 656aca43f60..9bb1bc039c5 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -103,7 +103,7 @@ public: return std::make_shared>(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { if constexpr (!std::is_same_v) { @@ -117,22 +117,22 @@ public: } } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).merge(this->data(rhs)); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { this->data(const_cast(place)).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { auto & column = assert_cast &>(to); column.getData().push_back(this->data(place).get()); diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index c3b1b09ab3c..8d99e2e8af3 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -50,7 +50,7 @@ private: size_t nested_size_of_data = 0; size_t num_arguments; - AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr place, size_t new_size, Arena & arena) const + AggregateFunctionForEachData & ensureAggregateData(AggregateDataPtr __restrict place, size_t new_size, Arena & arena) const { AggregateFunctionForEachData & state = data(place); @@ -128,7 +128,7 @@ public: return std::make_shared(nested_func->getReturnType()); } - void destroy(AggregateDataPtr place) const noexcept override + void destroy(AggregateDataPtr __restrict place) const noexcept override { AggregateFunctionForEachData & state = data(place); @@ -145,7 +145,7 @@ public: return nested_func->hasTrivialDestructor(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { const IColumn * nested[num_arguments]; @@ -178,7 +178,7 @@ public: } } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { const AggregateFunctionForEachData & rhs_state = data(rhs); AggregateFunctionForEachData & state = ensureAggregateData(place, rhs_state.dynamic_array_size, *arena); @@ -195,7 +195,7 @@ public: } } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { const AggregateFunctionForEachData & state = data(place); writeBinary(state.dynamic_array_size, buf); @@ -208,7 +208,7 @@ public: } } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { AggregateFunctionForEachData & state = data(place); @@ -225,7 +225,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { AggregateFunctionForEachData & state = data(place); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 27a8cf0b1ee..921274f7d59 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -142,14 +142,14 @@ public: } } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { [[maybe_unused]] auto a = new (place) Data; if constexpr (Trait::sampler == Sampler::RNG) a->rng.seed(seed); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if constexpr (Trait::sampler == Sampler::NONE) { @@ -176,7 +176,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { if constexpr (Trait::sampler == Sampler::NONE) { @@ -235,7 +235,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { const auto & value = this->data(place).value; size_t size = value.size(); @@ -254,7 +254,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { size_t size = 0; readVarUInt(size, buf); @@ -283,7 +283,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { const auto & value = this->data(place).value; size_t size = value.size(); @@ -416,8 +416,8 @@ class GroupArrayGeneralImpl final { static constexpr bool limit_num_elems = Trait::has_limit; using Data = GroupArrayGeneralData; - static Data & data(AggregateDataPtr place) { return *reinterpret_cast(place); } - static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast(place); } + static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } DataTypePtr & data_type; UInt64 max_elems; @@ -450,14 +450,14 @@ public: } } - void create(AggregateDataPtr place) const override + void create(AggregateDataPtr __restrict place) const override { [[maybe_unused]] auto a = new (place) Data; if constexpr (Trait::sampler == Sampler::RNG) a->rng.seed(seed); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if constexpr (Trait::sampler == Sampler::NONE) { @@ -485,7 +485,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { if constexpr (Trait::sampler == Sampler::NONE) mergeNoSampler(place, rhs, arena); @@ -495,7 +495,7 @@ public: // else if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const + void ALWAYS_INLINE mergeNoSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const { if (data(rhs).value.empty()) /// rhs state is empty return; @@ -517,7 +517,7 @@ public: a.push_back(b[i]->clone(arena), arena); } - void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const + void ALWAYS_INLINE mergeWithRNGSampler(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const { if (data(rhs).value.empty()) /// rhs state is empty return; @@ -553,7 +553,7 @@ public: } } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeVarUInt(data(place).value.size(), buf); @@ -573,7 +573,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { UInt64 elems; readVarUInt(elems, buf); @@ -606,7 +606,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { auto & column_array = assert_cast(to); @@ -692,8 +692,8 @@ class GroupArrayGeneralListImpl final { static constexpr bool limit_num_elems = Trait::has_limit; using Data = GroupArrayGeneralListData; - static Data & data(AggregateDataPtr place) { return *reinterpret_cast(place); } - static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast(place); } + static Data & data(AggregateDataPtr __restrict place) { return *reinterpret_cast(place); } + static const Data & data(ConstAggregateDataPtr __restrict place) { return *reinterpret_cast(place); } DataTypePtr & data_type; UInt64 max_elems; @@ -710,7 +710,7 @@ public: DataTypePtr getReturnType() const override { return std::make_shared(data_type); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { if (limit_num_elems && data(place).elems >= max_elems) return; @@ -731,7 +731,7 @@ public: ++data(place).elems; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { /// It is sadly, but rhs's Arena could be destroyed @@ -780,7 +780,7 @@ public: data(place).elems = new_elems; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { writeVarUInt(data(place).elems, buf); @@ -792,7 +792,7 @@ public: } } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { UInt64 elems; readVarUInt(elems, buf); @@ -821,7 +821,7 @@ public: data(place).last = prev; } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { auto & column_array = assert_cast(to); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index a4800dd715e..42005659a36 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -102,7 +102,7 @@ public: return std::make_shared(type); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { /// TODO Do positions need to be 1-based for this function? size_t position = columns[1]->getUInt(row_num); @@ -126,7 +126,7 @@ public: columns[0]->get(row_num, arr[position]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { Array & arr_lhs = data(place).value; const Array & arr_rhs = data(rhs).value; @@ -139,7 +139,7 @@ public: arr_lhs[i] = arr_rhs[i]; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { const Array & arr = data(place).value; size_t size = arr.size(); @@ -159,7 +159,7 @@ public: } } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override { size_t size = 0; readVarUInt(size, buf); @@ -179,7 +179,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { ColumnArray & to_array = assert_cast(to); IColumn & to_data = to_array.getData(); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 3281738e66d..2a713f3aed2 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -40,7 +40,7 @@ struct MovingData Array value; /// Prefix sums. T sum = 0; - void add(T val, Arena * arena) + void NO_SANITIZE_UNDEFINED add(T val, Arena * arena) { sum += val; value.push_back(sum, arena); @@ -114,13 +114,13 @@ public: return std::make_shared(std::make_shared()); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { auto value = static_cast(*columns[0]).getData()[row_num]; this->data(place).add(static_cast(value), arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { auto & cur_elems = this->data(place); auto & rhs_elems = this->data(rhs); @@ -138,7 +138,7 @@ public: cur_elems.sum += rhs_elems.sum; } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { const auto & value = this->data(place).value; size_t size = value.size(); @@ -146,7 +146,7 @@ public: buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override { size_t size = 0; readVarUInt(size, buf); @@ -163,7 +163,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { const auto & data = this->data(place); size_t size = data.value.size(); diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp index bf1d0af73ff..415ba557ef5 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.cpp @@ -16,6 +16,22 @@ namespace ErrorCodes namespace { + + template