mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge remote-tracking branch 'upstream/master' into ast-table-identifier-2
This commit is contained in:
commit
1002148886
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -196,7 +196,7 @@
|
||||
[submodule "contrib/rocksdb"]
|
||||
path = contrib/rocksdb
|
||||
url = https://github.com/facebook/rocksdb
|
||||
branch = v6.11.4
|
||||
branch = v6.14.5
|
||||
[submodule "contrib/xz"]
|
||||
path = contrib/xz
|
||||
url = https://github.com/xz-mirror/xz
|
||||
|
277
CHANGELOG.md
277
CHANGELOG.md
@ -1,3 +1,12 @@
|
||||
## ClickHouse release 20.11
|
||||
|
||||
### ClickHouse release v20.11.3.3-stable, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
||||
### ClickHouse release v20.11.2.1, 2020-11-11
|
||||
|
||||
#### Backward Incompatible Change
|
||||
@ -119,6 +128,24 @@
|
||||
|
||||
## ClickHouse release 20.10
|
||||
|
||||
### ClickHouse release v20.10.4.1-stable, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)).
|
||||
* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)).
|
||||
* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like http://domain.com?delete, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like http://domain.com/?delete. [#16813](https://github.com/ClickHouse/ClickHouse/pull/16813) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
|
||||
|
||||
### ClickHouse release v20.10.3.30, 2020-10-28
|
||||
|
||||
#### Backward Incompatible Change
|
||||
@ -331,6 +358,84 @@
|
||||
|
||||
## ClickHouse release 20.9
|
||||
|
||||
### ClickHouse release v20.9.5.5-stable, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)).
|
||||
* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)).
|
||||
* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
|
||||
### ClickHouse release v20.9.4.76-stable (2020-10-29)
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. I'm suprised how this bug can lurk for so long.... [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `<internal_replication>` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)).
|
||||
* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16160](https://github.com/ClickHouse/ClickHouse/pull/16160) ([tavplubix](https://github.com/tavplubix)).
|
||||
|
||||
|
||||
### ClickHouse release v20.9.3.45-stable (2020-10-09)
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)).
|
||||
* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)).
|
||||
* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
|
||||
### ClickHouse release v20.9.2.20, 2020-09-22
|
||||
|
||||
#### New Feature
|
||||
@ -405,6 +510,110 @@
|
||||
|
||||
## ClickHouse release 20.8
|
||||
|
||||
### ClickHouse release v20.8.6.6-lts, 2020-11-13
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)).
|
||||
* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)).
|
||||
* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
|
||||
|
||||
### ClickHouse release v20.8.5.45-lts, 2020-10-29
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)).
|
||||
* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)).
|
||||
* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `<internal_replication>` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)).
|
||||
* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16159](https://github.com/ClickHouse/ClickHouse/pull/16159) ([tavplubix](https://github.com/tavplubix)).
|
||||
|
||||
|
||||
### ClickHouse release v20.8.4.11-lts, 2020-10-09
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)).
|
||||
* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)).
|
||||
* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)).
|
||||
* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)).
|
||||
* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)).
|
||||
* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)).
|
||||
* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)).
|
||||
* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
|
||||
### ClickHouse release v20.8.3.18-stable, 2020-09-18
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)).
|
||||
* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([myrrc](https://github.com/myrrc)).
|
||||
* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Speed up server shutdown process if there are ongoing S3 requests. [#14858](https://github.com/ClickHouse/ClickHouse/pull/14858) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)).
|
||||
* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
|
||||
### ClickHouse release v20.8.2.3-stable, 2020-09-08
|
||||
|
||||
#### Backward Incompatible Change
|
||||
@ -1755,6 +1964,74 @@ No changes compared to v20.4.3.16-stable.
|
||||
|
||||
## ClickHouse release v20.3
|
||||
|
||||
|
||||
### ClickHouse release v20.3.21.2-lts, 2020-11-02
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.20.6-lts, 2020-10-09
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15724](https://github.com/ClickHouse/ClickHouse/pull/15724), [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)).
|
||||
* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([tavplubix](https://github.com/tavplubix)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.19.4-lts, 2020-09-18
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.18.10-lts, 2020-09-08
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed data race in `text_log`. It does not correspond to any real bug. [#9726](https://github.com/ClickHouse/ClickHouse/pull/9726) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Improvement
|
||||
|
||||
* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Performance Improvement
|
||||
|
||||
* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.17.173-lts, 2020-08-15
|
||||
|
||||
#### Bug Fix
|
||||
|
||||
* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)).
|
||||
* Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)).
|
||||
|
||||
|
||||
### ClickHouse release v20.3.16.165-lts 2020-08-10
|
||||
|
||||
#### Bug Fix
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept> // for std::logic_error
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
37
base/common/sort.h
Normal file
37
base/common/sort.h
Normal file
@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <miniselect/floyd_rivest_select.h> // Y_IGNORE
|
||||
#else
|
||||
# include <algorithm>
|
||||
#endif
|
||||
|
||||
template <class RandomIt>
|
||||
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
::miniselect::floyd_rivest_select(first, nth, last);
|
||||
#else
|
||||
::std::nth_element(first, nth, last);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class RandomIt>
|
||||
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
::miniselect::floyd_rivest_partial_sort(first, middle, last);
|
||||
#else
|
||||
::std::partial_sort(first, middle, last);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class RandomIt, class Compare>
|
||||
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
|
||||
#else
|
||||
::std::partial_sort(first, middle, last, compare);
|
||||
#endif
|
||||
}
|
@ -5,6 +5,9 @@
|
||||
/// (See at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include "throwError.h"
|
||||
#include <cfloat>
|
||||
#include <limits>
|
||||
#include <cassert>
|
||||
|
||||
namespace wide
|
||||
{
|
||||
@ -192,7 +195,7 @@ struct integer<Bits, Signed>::_impl
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr static auto to_Integral(T f) noexcept
|
||||
__attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept
|
||||
{
|
||||
if constexpr (std::is_same_v<T, __int128>)
|
||||
return f;
|
||||
@ -225,25 +228,54 @@ struct integer<Bits, Signed>::_impl
|
||||
self.items[i] = 0;
|
||||
}
|
||||
|
||||
constexpr static void wide_integer_from_bultin(integer<Bits, Signed> & self, double rhs) noexcept
|
||||
{
|
||||
if ((rhs > 0 && rhs < std::numeric_limits<uint64_t>::max()) || (rhs < 0 && rhs > std::numeric_limits<int64_t>::min()))
|
||||
/**
|
||||
* N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
|
||||
* the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
|
||||
*
|
||||
* t = a1 * max_int + b1, a1 > max_int, b1 < max_int
|
||||
* a1 = a2 * max_int + b2, a2 > max_int, b2 < max_int
|
||||
* a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case.
|
||||
*/
|
||||
template <class T>
|
||||
constexpr static void set_multiplier(integer<Bits, Signed> & self, T t) noexcept {
|
||||
constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
|
||||
const T alpha = t / max_int;
|
||||
|
||||
if (alpha <= max_int)
|
||||
self = static_cast<uint64_t>(alpha);
|
||||
else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
|
||||
set_multiplier<double>(self, alpha);
|
||||
|
||||
self *= max_int;
|
||||
self += static_cast<uint64_t>(t - alpha * max_int); // += b_i
|
||||
}
|
||||
|
||||
constexpr static void wide_integer_from_bultin(integer<Bits, Signed>& self, double rhs) noexcept {
|
||||
constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
|
||||
constexpr int64_t min_int = std::numeric_limits<int64_t>::min();
|
||||
|
||||
/// There are values in int64 that have more than 53 significant bits (in terms of double
|
||||
/// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
|
||||
/// the result may not fit in 64 bits.
|
||||
/// The example of such a number is 9.22337e+18.
|
||||
/// As to_Integral does a static_cast to int64_t, it may result in UB.
|
||||
/// The necessary check here is that long double has enough significant (mantissa) bits to store the
|
||||
/// int64_t max value precisely.
|
||||
static_assert(LDBL_MANT_DIG >= 64,
|
||||
"On your system long double has less than 64 precision bits,"
|
||||
"which may result in UB when initializing double from int64_t");
|
||||
|
||||
if ((rhs > 0 && rhs < max_int) || (rhs < 0 && rhs > min_int))
|
||||
{
|
||||
self = to_Integral(rhs);
|
||||
self = static_cast<int64_t>(rhs);
|
||||
return;
|
||||
}
|
||||
|
||||
long double r = rhs;
|
||||
if (r < 0)
|
||||
r = -r;
|
||||
const long double rhs_long_double = (static_cast<long double>(rhs) < 0)
|
||||
? -static_cast<long double>(rhs)
|
||||
: rhs;
|
||||
|
||||
size_t count = r / std::numeric_limits<uint64_t>::max();
|
||||
self = count;
|
||||
self *= std::numeric_limits<uint64_t>::max();
|
||||
long double to_diff = count;
|
||||
to_diff *= std::numeric_limits<uint64_t>::max();
|
||||
|
||||
self += to_Integral(r - to_diff);
|
||||
set_multiplier(self, rhs_long_double);
|
||||
|
||||
if (rhs < 0)
|
||||
self = -self;
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
NO_COMPILER_WARNINGS()
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL (GLOBAL clickhouse/base/pcg-random)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
CFLAGS(-g0)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
ADDINCL(GLOBAL clickhouse/base/widechar_width)
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
RECURSE(
|
||||
common
|
||||
daemon
|
||||
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit 198458b35f100da32bd3e74c2a3ce8d236db299b
|
||||
Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a
|
||||
Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd
|
@ -347,8 +347,9 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/builder.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/c.cc
|
||||
@ -394,6 +395,8 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/output_validator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
|
||||
${ROCKSDB_SOURCE_DIR}/db/repair.cc
|
||||
@ -451,12 +454,12 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options.cc
|
||||
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
|
||||
@ -507,6 +510,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
|
||||
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
|
||||
@ -515,6 +519,7 @@ set(SOURCES
|
||||
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
|
||||
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
|
||||
|
@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build
|
||||
ENV OUTPUT_DIR=/output
|
||||
ENV IGNORE='.*contrib.*'
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \
|
||||
RUN apt-get update && apt-get install cmake --yes --no-install-recommends
|
||||
|
||||
CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \
|
||||
dpkg -i /package_folder/clickhouse-common-static_*.deb; \
|
||||
llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \
|
||||
llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \
|
||||
genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR}
|
||||
|
@ -15,6 +15,9 @@ stage=${stage:-}
|
||||
# empty parameter.
|
||||
read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}"
|
||||
|
||||
# Run only matching tests.
|
||||
FASTTEST_FOCUS=${FASTTEST_FOCUS:-""}
|
||||
|
||||
FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}")
|
||||
FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}")
|
||||
FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}")
|
||||
@ -287,9 +290,11 @@ TESTS_TO_SKIP=(
|
||||
01322_ttest_scipy
|
||||
|
||||
01545_system_errors
|
||||
# Checks system.errors
|
||||
01563_distributed_query_finish
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
|
@ -30,7 +30,7 @@ RUN apt-get update \
|
||||
tzdata \
|
||||
vim \
|
||||
wget \
|
||||
&& pip3 --no-cache-dir install clickhouse_driver scipy \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
|
@ -16,7 +16,7 @@
|
||||
<max_execution_time>300</max_execution_time>
|
||||
|
||||
<!-- One NUMA node w/o hyperthreading -->
|
||||
<max_threads>20</max_threads>
|
||||
<max_threads>12</max_threads>
|
||||
</default>
|
||||
</profiles>
|
||||
</yandex>
|
||||
|
@ -14,10 +14,12 @@ import string
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import logging
|
||||
import xml.etree.ElementTree as et
|
||||
from threading import Thread
|
||||
from scipy import stats
|
||||
|
||||
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING')
|
||||
|
||||
total_start_seconds = time.perf_counter()
|
||||
stage_start_seconds = total_start_seconds
|
||||
@ -171,12 +173,9 @@ reportStageEnd('drop-1')
|
||||
settings = root.findall('settings/*')
|
||||
for conn_index, c in enumerate(all_connections):
|
||||
for s in settings:
|
||||
try:
|
||||
q = f"set {s.tag} = '{s.text}'"
|
||||
c.execute(q)
|
||||
print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}')
|
||||
except:
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
# requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
|
||||
# (https://github.com/mymarilyn/clickhouse-driver/pull/142)
|
||||
c.settings[s.tag] = s.text
|
||||
|
||||
reportStageEnd('settings')
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
# docker build -t yandex/clickhouse-stateful-test-with-coverage .
|
||||
FROM yandex/clickhouse-stateless-test
|
||||
FROM yandex/clickhouse-stateless-test-with-coverage
|
||||
|
||||
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests
|
||||
python3-requests procps psmisc
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
COPY run.sh /run.sh
|
||||
|
@ -1,40 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
kill_clickhouse () {
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process"
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive"
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
}
|
||||
|
||||
wait_llvm_profdata () {
|
||||
while kill -0 "$(pgrep llvm-profdata-10)"
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive"
|
||||
sleep 3
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
merge_client_files_in_background () {
|
||||
client_files=$(ls /client_*profraw 2>/dev/null)
|
||||
if [ -n "$client_files" ]
|
||||
then
|
||||
llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw"
|
||||
rm "$client_files"
|
||||
fi
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
|
||||
@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
function start()
|
||||
{
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
timeout 120 service clickhouse-server start
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
start
|
||||
start_clickhouse
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
if ! /s3downloader --dataset-names $DATASETS; then
|
||||
@ -81,25 +66,20 @@ fi
|
||||
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
|
||||
while /bin/true; do
|
||||
merge_client_files_in_background
|
||||
sleep 2
|
||||
done &
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test"
|
||||
|
||||
kill_clickhouse
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
@ -109,15 +89,10 @@ fi
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
wait_llvm_profdata
|
||||
|
||||
sleep 3
|
||||
|
||||
wait_llvm_profdata # 100% merged all parts
|
||||
|
||||
|
||||
cp /*.profraw /profraw ||:
|
||||
|
@ -29,7 +29,7 @@ def dowload_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, 'w') as f:
|
||||
with open(path, 'wb') as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get('content-length')
|
||||
|
@ -1,4 +1,4 @@
|
||||
# docker build -t yandex/clickhouse-stateless-with-coverage-test .
|
||||
# docker build -t yandex/clickhouse-stateless-test-with-coverage .
|
||||
# TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs)
|
||||
FROM yandex/clickhouse-test-base
|
||||
|
||||
@ -28,7 +28,9 @@ RUN apt-get update -y \
|
||||
lsof \
|
||||
unixodbc \
|
||||
wget \
|
||||
qemu-user-static
|
||||
qemu-user-static \
|
||||
procps \
|
||||
psmisc
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
|
@ -2,27 +2,41 @@
|
||||
|
||||
kill_clickhouse () {
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
pkill -f "clickhouse-server" 2>/dev/null
|
||||
|
||||
for _ in {1..10}
|
||||
|
||||
for _ in {1..120}
|
||||
do
|
||||
if ! kill -0 "$(pgrep -u clickhouse)"; then
|
||||
echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S'
|
||||
break
|
||||
else
|
||||
echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 10
|
||||
fi
|
||||
if ! pkill -0 -f "clickhouse-server" ; then break ; fi
|
||||
echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S'
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "Will try to send second kill signal for sure"
|
||||
kill "$(pgrep -u clickhouse)" 2>/dev/null
|
||||
sleep 5
|
||||
echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S'
|
||||
if pkill -0 -f "clickhouse-server"
|
||||
then
|
||||
pstree -apgT
|
||||
jobs
|
||||
echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S'
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_clickhouse () {
|
||||
LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml &
|
||||
counter=0
|
||||
until clickhouse-client --query "SELECT 1"
|
||||
do
|
||||
if [ "$counter" -gt 120 ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
}
|
||||
|
||||
chmod 777 /
|
||||
@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/
|
||||
|
||||
start_clickhouse
|
||||
|
||||
sleep 10
|
||||
|
||||
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
@ -35,7 +35,7 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN pip3 install urllib3 testflows==1.6.62 docker-compose docker dicttoxml kazoo tzlocal
|
||||
RUN pip3 install urllib3 testflows==1.6.65 docker-compose docker dicttoxml kazoo tzlocal
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
|
@ -30,4 +30,4 @@ Instead of inserting data manually, you might consider to use one of [client lib
|
||||
- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type.
|
||||
|
||||
!!! note "Note"
|
||||
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface.
|
||||
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface.
|
||||
|
@ -26,6 +26,9 @@ toc_title: Client Libraries
|
||||
- [go-clickhouse](https://github.com/roistat/go-clickhouse)
|
||||
- [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse)
|
||||
- [golang-clickhouse](https://github.com/leprosus/golang-clickhouse)
|
||||
- Swift
|
||||
- [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO)
|
||||
- [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor)
|
||||
- NodeJs
|
||||
- [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
|
||||
- [node-clickhouse](https://github.com/apla/node-clickhouse)
|
||||
|
@ -1081,4 +1081,45 @@ Default value: `/var/lib/clickhouse/access/`.
|
||||
|
||||
- [Access Control and Account Management](../../operations/access-rights.md#access-control)
|
||||
|
||||
## user_directories {#user_directories}
|
||||
|
||||
Section of the configuration file that contains settings:
|
||||
- Path to configuration file with predefined users.
|
||||
- Path to folder where users created by SQL commands are stored.
|
||||
|
||||
If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used.
|
||||
|
||||
The `user_directories` section can contain any number of items, the order of the items means their precedence (the higher the item the higher the precedence).
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<user_directories>
|
||||
<users_xml>
|
||||
<path>/etc/clickhouse-server/users.xml</path>
|
||||
</users_xml>
|
||||
<local_directory>
|
||||
<path>/var/lib/clickhouse/access/</path>
|
||||
</local_directory>
|
||||
</user_directories>
|
||||
```
|
||||
|
||||
You can also specify settings `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server.
|
||||
|
||||
To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters:
|
||||
- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty.
|
||||
- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authenthication attept will fail as if the provided password was incorrect.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<ldap>
|
||||
<server>my_ldap_server</server>
|
||||
<roles>
|
||||
<my_local_role1 />
|
||||
<my_local_role2 />
|
||||
</roles>
|
||||
</ldap>
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) <!--hide-->
|
||||
|
@ -307,7 +307,51 @@ Disabled by default.
|
||||
|
||||
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
|
||||
|
||||
For TSV input format switches to parsing enum values as enum ids.
|
||||
Enables or disables parsing enum values as enum ids for TSV input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Enum values are parsed as values.
|
||||
- 1 — Enum values are parsed as enum IDs
|
||||
|
||||
Default value: 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
|
||||
```
|
||||
|
||||
When the `input_format_tsv_enum_as_number` setting is enabled:
|
||||
|
||||
```sql
|
||||
SET input_format_tsv_enum_as_number = 1;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1;
|
||||
SELECT * FROM table_with_enum_column_for_tsv_insert;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──Id─┬─Value──┐
|
||||
│ 102 │ second │
|
||||
└─────┴────────┘
|
||||
┌──Id─┬─Value──┐
|
||||
│ 103 │ first │
|
||||
└─────┴────────┘
|
||||
```
|
||||
|
||||
When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query:
|
||||
|
||||
```sql
|
||||
SET input_format_tsv_enum_as_number = 0;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
```
|
||||
|
||||
throws an exception.
|
||||
|
||||
## input_format_null_as_default {#settings-input-format-null-as-default}
|
||||
|
||||
@ -1182,7 +1226,47 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal (
|
||||
|
||||
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
|
||||
|
||||
For CSV input format switches to parsing enum values as enum ids.
|
||||
Enables or disables parsing enum values as enum ids for CSV input format.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Enum values are parsed as values.
|
||||
- 1 — Enum values are parsed as enum IDs.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
**Examples**
|
||||
|
||||
Consider the table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
|
||||
```
|
||||
|
||||
When the `input_format_csv_enum_as_number` setting is enabled:
|
||||
|
||||
```sql
|
||||
SET input_format_csv_enum_as_number = 1;
|
||||
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
|
||||
SELECT * FROM table_with_enum_column_for_csv_insert;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──Id─┬─Value─────┐
|
||||
│ 102 │ second │
|
||||
└─────┴───────────┘
|
||||
```
|
||||
|
||||
When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query:
|
||||
|
||||
```sql
|
||||
SET input_format_csv_enum_as_number = 0;
|
||||
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
|
||||
```
|
||||
|
||||
throws an exception.
|
||||
|
||||
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
|
||||
|
||||
@ -2233,4 +2317,10 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format.
|
||||
|
||||
Default value: `\N`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
70
docs/en/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Contains information about currently running background fetches.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`.
|
||||
|
||||
Now you can use the `groupArray` function to create an array from the `y` column:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`.
|
||||
Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument.
|
||||
For example for functions with the suffix `State` the return type will be `AggregateFunction`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Result:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -337,26 +337,124 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
└────────────┴───────────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
## date_trunc(datepart, time_or_data\[, time_zone\]), dateTrunc(datepart, time_or_data\[, time_zone\]) {#date_trunc}
|
||||
## date_trunc {#date_trunc}
|
||||
|
||||
Truncates a date or date with time based on the specified datepart, such as
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
- `day`
|
||||
- `week`
|
||||
- `month`
|
||||
- `quarter`
|
||||
- `year`
|
||||
Truncates date and time data to the specified part of date.
|
||||
|
||||
```sql
|
||||
SELECT date_trunc('hour', now())
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
date_trunc(unit, value[, timezone])
|
||||
```
|
||||
|
||||
## now {#now}
|
||||
Alias: `dateTrunc`.
|
||||
|
||||
Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided.
|
||||
This function returns a constant, even if the request took a long time to complete.
|
||||
**Parameters**
|
||||
|
||||
- `unit` — Part of date. [String](../syntax.md#syntax-string-literal).
|
||||
Possible values:
|
||||
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
- `day`
|
||||
- `week`
|
||||
- `month`
|
||||
- `quarter`
|
||||
- `year`
|
||||
|
||||
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value, truncated to the specified part of date.
|
||||
|
||||
Type: [Datetime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query without timezone:
|
||||
|
||||
``` sql
|
||||
SELECT now(), date_trunc('hour', now());
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────────now()─┬─date_trunc('hour', now())─┐
|
||||
│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │
|
||||
└─────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
Query with the specified timezone:
|
||||
|
||||
```sql
|
||||
SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐
|
||||
│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │
|
||||
└─────────────────────┴────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
|
||||
|
||||
# now {#now}
|
||||
|
||||
Returns the current date and time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
now([timezone])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Current date and time.
|
||||
|
||||
Type: [Datetime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query without timezone:
|
||||
|
||||
``` sql
|
||||
SELECT now();
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────────now()─┐
|
||||
│ 2020-10-17 07:42:09 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Query with the specified timezone:
|
||||
|
||||
``` sql
|
||||
SELECT now('Europe/Moscow');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─now('Europe/Moscow')─┐
|
||||
│ 2020-10-17 10:42:23 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## today {#today}
|
||||
|
||||
@ -437,18 +535,7 @@ dateDiff('unit', startdate, enddate, [timezone])
|
||||
|
||||
- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal).
|
||||
|
||||
Supported values:
|
||||
|
||||
| unit |
|
||||
| ---- |
|
||||
|second |
|
||||
|minute |
|
||||
|hour |
|
||||
|day |
|
||||
|week |
|
||||
|month |
|
||||
|quarter |
|
||||
|year |
|
||||
Supported values: second, minute, hour, day, week, month, quarter, year.
|
||||
|
||||
- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
|
381
docs/en/sql-reference/functions/encryption-functions.md
Normal file
381
docs/en/sql-reference/functions/encryption-functions.md
Normal file
@ -0,0 +1,381 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: Encryption
|
||||
---
|
||||
|
||||
# Encryption functions {#encryption-functions}
|
||||
|
||||
These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm.
|
||||
|
||||
Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128-`, `-196-`, and `-256-` modes respectively.
|
||||
|
||||
Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored).
|
||||
|
||||
Note that these functions work slowly.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
This function encrypts data using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `-gcm` mode and with `aad`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
|
||||
|
||||
Supported encryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Ciphered String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Example without `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Example with `iv`:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
This function decrypts data using these modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
||||
Supported decryption modes:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create this table:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Insert this data:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->
|
@ -306,3 +306,67 @@ execute_native_thread_routine
|
||||
start_thread
|
||||
clone
|
||||
```
|
||||
## tid {#tid}
|
||||
|
||||
Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tid()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT tid();
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─tid()─┐
|
||||
│ 3878 │
|
||||
└───────┘
|
||||
```
|
||||
## logTrace {#logtrace}
|
||||
|
||||
Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
logTrace('message')
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Always returns 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT logTrace('logTrace message');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─logTrace('logTrace message')─┐
|
||||
│ 0 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->
|
||||
|
@ -325,7 +325,59 @@ This function accepts a number or date or date with time, and returns a FixedStr
|
||||
|
||||
## reinterpretAsUUID {#reinterpretasuuid}
|
||||
|
||||
This function accepts FixedString, and returns UUID. Takes 16 bytes string. If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
|
||||
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
reinterpretAsUUID(fixed_string)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
|
||||
|
||||
**Examples**
|
||||
|
||||
String to UUID.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
|
||||
│ 08090a0b-0c0d-0e0f-0001-020304050607 │
|
||||
└───────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Going back and forth from String to UUID.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
generateUUIDv4() AS uuid,
|
||||
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
|
||||
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
|
||||
SELECT uuid = uuid2;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─equals(uuid, uuid2)─┐
|
||||
│ 1 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## CAST(x, T) {#type_conversion_function-cast}
|
||||
|
||||
|
@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above).
|
||||
|
||||
For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww}
|
||||
|
||||
Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www".
|
||||
|
||||
For example:
|
||||
|
||||
- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`.
|
||||
- `cutToFirstSignificantSubdomain('tr') = ''`.
|
||||
|
||||
### port(URL\[, default_port = 0\]) {#port}
|
||||
|
||||
|
@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values.
|
||||
|
@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../
|
||||
|
||||
### Drawbacks {#drawbacks}
|
||||
|
||||
Queries that use `FINAL` are executed not as fast as similar queries that don’t, because:
|
||||
Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because:
|
||||
|
||||
- Query is executed in a single thread and data is merged during query execution.
|
||||
- Data is merged during query execution.
|
||||
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
|
||||
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##}
|
||||
|
@ -6,7 +6,7 @@ toc_title: GROUP BY
|
||||
|
||||
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
|
||||
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”.
|
||||
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
|
||||
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
|
||||
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
|
||||
|
||||
@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v
|
||||
|
||||
If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value.
|
||||
|
||||
## WITH ROLLUP Modifier {#with-rollup-modifier}
|
||||
|
||||
`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression.
|
||||
|
||||
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (and `day` column is filled with zeros);
|
||||
- `GROUP BY year` (now `month, day` columns are both filled with zeros);
|
||||
- and totals (and all three key expression columns are zeros).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## WITH CUBE Modifier {#with-cube-modifier}
|
||||
|
||||
`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
|
||||
|
||||
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
|
||||
|
||||
!!! note "Note"
|
||||
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the table t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- and totals.
|
||||
|
||||
Columns, excluded from `GROUP BY`, are filled with zeros.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## WITH TOTALS Modifier {#with-totals-modifier}
|
||||
|
||||
If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values).
|
||||
@ -88,8 +236,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions.
|
||||
|
||||
As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’.
|
||||
|
||||
Example:
|
||||
@ -105,10 +251,6 @@ GROUP BY domain
|
||||
|
||||
For every different key value encountered, `GROUP BY` calculates a set of aggregate function values.
|
||||
|
||||
`GROUP BY` is not supported for array columns.
|
||||
|
||||
A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`.
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
||||
Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
|
||||
|
@ -20,7 +20,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
|
@ -204,7 +204,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
|
||||
|
||||
## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
|
||||
|
||||
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables.
|
||||
ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication/#table_engines-replication) tables.
|
||||
|
||||
### STOP FETCHES {#query_language-system-stop-fetches}
|
||||
|
||||
|
@ -57,7 +57,7 @@ Identifiers are:
|
||||
|
||||
Identifiers can be quoted or non-quoted. The latter is preferred.
|
||||
|
||||
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.`
|
||||
Non-quoted identifiers must match the regex `^[0-9a-zA-Z_]*[a-zA-Z_]$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.`
|
||||
|
||||
If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``.
|
||||
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -1068,4 +1068,45 @@ ClickHouse использует ZooKeeper для хранения метадан
|
||||
|
||||
- [Управление доступом](../access-rights.md#access-control)
|
||||
|
||||
## user_directories {#user_directories}
|
||||
|
||||
Секция конфигурационного файла,которая содержит настройки:
|
||||
- Путь к конфигурационному файлу с предустановленными пользователями.
|
||||
- Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд.
|
||||
|
||||
Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется.
|
||||
|
||||
Секция `user_directories` может содержать любое количество элементов, порядок расположения элементов обозначает их приоритет (чем выше элемент, тем выше приоритет).
|
||||
|
||||
**Пример**
|
||||
|
||||
``` xml
|
||||
<user_directories>
|
||||
<users_xml>
|
||||
<path>/etc/clickhouse-server/users.xml</path>
|
||||
</users_xml>
|
||||
<local_directory>
|
||||
<path>/var/lib/clickhouse/access/</path>
|
||||
</local_directory>
|
||||
</user_directories>
|
||||
```
|
||||
|
||||
Также вы можете указать настройку `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol).
|
||||
|
||||
Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами:
|
||||
- `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым.
|
||||
- `roles` — раздел со списком локально определенных ролей, которые будут назначены каждому пользователю, полученному с LDAP-сервера. Если роли не заданы, пользователь не сможет выполнять никаких действий после аутентификации. Если какая-либо из перечисленных ролей не определена локально во время проверки подлинности, попытка проверки подлинности завершится неудачей, как если бы предоставленный пароль был неверным.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` xml
|
||||
<ldap>
|
||||
<server>my_ldap_server</server>
|
||||
<roles>
|
||||
<my_local_role1 />
|
||||
<my_local_role2 />
|
||||
</roles>
|
||||
</ldap>
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_configuration_parameters/settings/) <!--hide-->
|
||||
|
@ -289,6 +289,54 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
|
||||
|
||||
Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — парсинг значений перечисления как значений.
|
||||
- 1 — парсинг значений перечисления как идентификаторов перечисления.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
|
||||
```
|
||||
|
||||
При включенной настройке `input_format_tsv_enum_as_number`:
|
||||
|
||||
```sql
|
||||
SET input_format_tsv_enum_as_number = 1;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1;
|
||||
SELECT * FROM table_with_enum_column_for_tsv_insert;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌──Id─┬─Value──┐
|
||||
│ 102 │ second │
|
||||
└─────┴────────┘
|
||||
┌──Id─┬─Value──┐
|
||||
│ 103 │ first │
|
||||
└─────┴────────┘
|
||||
```
|
||||
|
||||
При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`:
|
||||
|
||||
```sql
|
||||
SET input_format_tsv_enum_as_number = 0;
|
||||
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
|
||||
```
|
||||
|
||||
сгенерирует исключение.
|
||||
|
||||
## input_format_null_as_default {#settings-input-format-null-as-default}
|
||||
|
||||
Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов).
|
||||
@ -1127,6 +1175,50 @@ SELECT area/period FROM account_orders FORMAT JSON;
|
||||
|
||||
Для формата CSV включает или выключает парсинг неэкранированной строки `NULL` как литерала (синоним для `\N`)
|
||||
|
||||
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
|
||||
|
||||
Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- 0 — парсинг значений перечисления как значений.
|
||||
- 1 — парсинг значений перечисления как идентификаторов перечисления.
|
||||
|
||||
Значение по умолчанию: 0.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
|
||||
```
|
||||
|
||||
При включенной настройке `input_format_csv_enum_as_number`:
|
||||
|
||||
```sql
|
||||
SET input_format_csv_enum_as_number = 1;
|
||||
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
|
||||
SELECT * FROM table_with_enum_column_for_csv_insert;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌──Id─┬─Value──┐
|
||||
│ 102 │ second │
|
||||
└─────┴────────┘
|
||||
```
|
||||
|
||||
При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`:
|
||||
|
||||
```sql
|
||||
SET input_format_csv_enum_as_number = 0;
|
||||
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
|
||||
```
|
||||
|
||||
сгенерирует исключение.
|
||||
|
||||
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
|
||||
|
||||
Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль).
|
||||
@ -2095,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
|
||||
|
||||
Значение по умолчанию: `1`.
|
||||
|
||||
## output_format_tsv_null_representation {#output_format_tsv_null_representation}
|
||||
|
||||
Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV.
|
||||
|
||||
Значение по умолчанию: `\N`.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
|
||||
|
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
70
docs/ru/operations/system-tables/replicated_fetches.md
Normal file
@ -0,0 +1,70 @@
|
||||
# system.replicated_fetches {#system_tables-replicated_fetches}
|
||||
|
||||
Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик.
|
||||
|
||||
Столбцы:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
|
||||
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах.
|
||||
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1.
|
||||
|
||||
- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска.
|
||||
|
||||
- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску.
|
||||
|
||||
- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции.
|
||||
|
||||
- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах.
|
||||
|
||||
- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах.
|
||||
|
||||
- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике.
|
||||
|
||||
- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики.
|
||||
|
||||
- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики.
|
||||
|
||||
- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы.
|
||||
|
||||
- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса.
|
||||
|
||||
- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока.
|
||||
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
database: default
|
||||
table: t
|
||||
elapsed: 7.243039876
|
||||
progress: 0.41832135995612835
|
||||
result_part_name: all_0_0_0
|
||||
result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/
|
||||
partition_id: all
|
||||
total_size_bytes_compressed: 1052783726
|
||||
bytes_read_compressed: 440401920
|
||||
source_replica_path: /clickhouse/test/t/replicas/1
|
||||
source_replica_hostname: node1
|
||||
source_replica_port: 9009
|
||||
interserver_scheme: http
|
||||
URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false
|
||||
to_detached: 0
|
||||
thread_id: 54
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) <!--hide-->
|
@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big
|
||||
└────────┘
|
||||
```
|
||||
|
||||
Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`.
|
||||
|
||||
Теперь с помощью функции `groupArray` сформируем массив из столбца `y`:
|
||||
|
||||
``` sql
|
||||
|
@ -0,0 +1,40 @@
|
||||
---
|
||||
toc_priority: 150
|
||||
---
|
||||
|
||||
## initializeAggregation {#initializeaggregation}
|
||||
|
||||
Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`.
|
||||
Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
initializeAggregation (aggregate_function, column_1, column_2);
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string).
|
||||
- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`.
|
||||
|
||||
Пример:
|
||||
|
||||
Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000);
|
||||
```
|
||||
Результат:
|
||||
|
||||
┌─uniqMerge(state)─┐
|
||||
│ 3 │
|
||||
└──────────────────┘
|
@ -1157,6 +1157,7 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res
|
||||
┌─res──────────┐
|
||||
│ [1, 2, 3, 4] │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## arrayAUC {#arrayauc}
|
||||
|
||||
|
@ -234,10 +234,124 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
|
||||
|
||||
Переводит дату-с-временем в номер секунды, начиная с некоторого фиксированного момента в прошлом.
|
||||
|
||||
## date_trunc {#date_trunc}
|
||||
|
||||
Отсекает от даты и времени части, меньшие чем указанная часть.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
date_trunc(unit, value[, timezone])
|
||||
```
|
||||
|
||||
Синоним: `dateTrunc`.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `unit` — Название части даты или времени. [String](../syntax.md#syntax-string-literal).
|
||||
Возможные значения:
|
||||
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
- `day`
|
||||
- `week`
|
||||
- `month`
|
||||
- `quarter`
|
||||
- `year`
|
||||
|
||||
- `value` — Дата и время. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Дата и время, отсеченные до указанной части.
|
||||
|
||||
Тип: [Datetime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос без указания часового пояса:
|
||||
|
||||
``` sql
|
||||
SELECT now(), date_trunc('hour', now());
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌───────────────now()─┬─date_trunc('hour', now())─┐
|
||||
│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │
|
||||
└─────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос с указанием часового пояса:
|
||||
|
||||
```sql
|
||||
SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐
|
||||
│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │
|
||||
└─────────────────────┴────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**См. также**
|
||||
|
||||
- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
|
||||
|
||||
## now {#now}
|
||||
|
||||
Принимает ноль аргументов и возвращает текущее время на один из моментов выполнения запроса.
|
||||
Функция возвращает константу, даже если запрос выполнялся долго.
|
||||
Возвращает текущую дату и время.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
now([timezone])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). [String](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Текущие дата и время.
|
||||
|
||||
Тип: [Datetime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос без указания часового пояса:
|
||||
|
||||
``` sql
|
||||
SELECT now();
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌───────────────now()─┐
|
||||
│ 2020-10-17 07:42:09 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Запрос с указанием часового пояса:
|
||||
|
||||
``` sql
|
||||
SELECT now('Europe/Moscow');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─now('Europe/Moscow')─┐
|
||||
│ 2020-10-17 10:42:23 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## today {#today}
|
||||
|
||||
|
382
docs/ru/sql-reference/functions/encryption-functions.md
Normal file
382
docs/ru/sql-reference/functions/encryption-functions.md
Normal file
@ -0,0 +1,382 @@
|
||||
---
|
||||
toc_priority: 67
|
||||
toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448\u0438\u0444\u0440\u043e\u0432\u0430\u043d\u0438\u044f"
|
||||
---
|
||||
|
||||
# Функции шифрования {#encryption-functions}
|
||||
|
||||
Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма.
|
||||
|
||||
Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно.
|
||||
|
||||
Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются).
|
||||
|
||||
Обратите внимание, что эти функции работают медленно.
|
||||
|
||||
## encrypt {#encrypt}
|
||||
|
||||
Функция поддерживает шифрование данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — дополнительные аутентифицированные данные. Не шифруются, но влияют на расшифровку. Параметр работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Пример без `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
|
||||
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
|
||||
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
|
||||
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример с `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
|
||||
│ aes-256-ctr │ │
|
||||
│ aes-256-ctr │ 7FB039F7 │
|
||||
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
|
||||
└─────────────┴───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример в режиме `-gcm`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
|
||||
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
|
||||
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
|
||||
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример в режиме `-gcm` и с `aad`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
|
||||
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
|
||||
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
|
||||
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
|
||||
└─────────────┴────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
|
||||
Функция поддерживает шифрофание данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Пример без `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
|
||||
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
|
||||
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
|
||||
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
|
||||
└─────────────┴──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Пример с `iv`:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
|
||||
│ aes-256-cfb128 │ │
|
||||
│ aes-256-cfb128 │ 7FB039F7 │
|
||||
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
|
||||
└────────────────┴────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## decrypt {#decrypt}
|
||||
|
||||
Функция поддерживает расшифровку данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
- aes-128-gcm, aes-192-gcm, aes-256-gcm
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов опциональный. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — дополнительные аутентифицированные данные. Текст не будет расшифрован, если это значение неверно. Работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
|
||||
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
|
||||
│ aes-128-ecb │ │
|
||||
│ aes-128-ecb │ text │
|
||||
│ aes-128-ecb │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
|
||||
Функция поддерживает расшифровку данных следующими режимами:
|
||||
|
||||
- aes-128-ecb, aes-192-ecb, aes-256-ecb
|
||||
- aes-128-cbc, aes-192-cbc, aes-256-cbc
|
||||
- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1
|
||||
- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8
|
||||
- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128
|
||||
- aes-128-ofb, aes-192-ofb, aes-256-ofb
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Создадим такую таблицу:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE encryption_test
|
||||
(
|
||||
input String,
|
||||
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
|
||||
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
|
||||
key32 String DEFAULT substring(key, 1, 32),
|
||||
key24 String DEFAULT substring(key, 1, 24),
|
||||
key16 String DEFAULT substring(key, 1, 16)
|
||||
) Engine = Memory;
|
||||
```
|
||||
|
||||
Вставим эти данные:
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
|
||||
│ aes-128-cbc │ │
|
||||
│ aes-128-cbc │ text │
|
||||
│ aes-128-cbc │ What Is ClickHouse? │
|
||||
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) <!--hide-->
|
@ -306,3 +306,68 @@ execute_native_thread_routine
|
||||
start_thread
|
||||
clone
|
||||
```
|
||||
|
||||
## tid {#tid}
|
||||
|
||||
Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
tid()
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT tid();
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─tid()─┐
|
||||
│ 3878 │
|
||||
└───────┘
|
||||
```
|
||||
## logTrace {#logtrace}
|
||||
|
||||
Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
logTrace('message')
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Всегда возвращает 0.
|
||||
|
||||
**Example**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT logTrace('logTrace message');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─logTrace('logTrace message')─┐
|
||||
│ 0 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) <!--hide-->
|
@ -319,6 +319,62 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
|
||||
|
||||
Функция принимает число или дату или дату-с-временем и возвращает строку, содержащую байты, представляющие соответствующее значение в host order (little endian). При этом, отбрасываются нулевые байты с конца. Например, значение 255 типа UInt32 будет строкой длины 1 байт.
|
||||
|
||||
## reinterpretAsUUID {#reinterpretasuuid}
|
||||
|
||||
Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количетсвом нулевых байт с конца. Если строка длиннее, чем шестнадцать байт, то игнорируются лишние байты с конца.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
reinterpretAsUUID(fixed_string)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `fixed_string` — cтрока с big-endian порядком байтов. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение типа [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Интерпретация строки как UUID.
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
|
||||
│ 08090a0b-0c0d-0e0f-0001-020304050607 │
|
||||
└───────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Переход в UUID и обратно.
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
generateUUIDv4() AS uuid,
|
||||
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
|
||||
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
|
||||
SELECT uuid = uuid2;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─equals(uuid, uuid2)─┐
|
||||
│ 1 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## CAST(x, T) {#type_conversion_function-cast}
|
||||
|
||||
Преобразует x в тип данных t.
|
||||
|
@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
@ -27,5 +27,5 @@ LIFETIME([MIN val1] MAX val2)
|
||||
|
||||
Смотрите [Внешние словари](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary)
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary)
|
||||
<!--hide-->
|
@ -27,9 +27,9 @@ toc_title: FROM
|
||||
|
||||
### Недостатки {#drawbacks}
|
||||
|
||||
Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что:
|
||||
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
|
||||
|
||||
- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса.
|
||||
- Данные мёржатся во время выполнения запроса.
|
||||
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
|
||||
|
||||
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}
|
||||
|
@ -43,6 +43,153 @@ toc_title: GROUP BY
|
||||
|
||||
Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением.
|
||||
|
||||
## Модификатор WITH ROLLUP {#with-rollup-modifier}
|
||||
|
||||
Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево:
|
||||
|
||||
- `GROUP BY year, month, day`;
|
||||
- `GROUP BY year, month` (а колонка `day` заполнена нулями);
|
||||
- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями);
|
||||
- и общий итог (все три колонки с ключевыми выражениями заполнены нулями).
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
## Модификатор WITH CUBE {#with-cube-modifier}
|
||||
|
||||
Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`.
|
||||
|
||||
Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка.
|
||||
|
||||
!!! note "Примечание"
|
||||
Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов.
|
||||
|
||||
**Пример**
|
||||
|
||||
Рассмотрим таблицу t:
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┐
|
||||
│ 2019 │ 1 │ 5 │
|
||||
│ 2019 │ 1 │ 15 │
|
||||
│ 2020 │ 1 │ 5 │
|
||||
│ 2020 │ 1 │ 15 │
|
||||
│ 2020 │ 10 │ 5 │
|
||||
│ 2020 │ 10 │ 15 │
|
||||
└──────┴───────┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
|
||||
```
|
||||
|
||||
Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений:
|
||||
|
||||
- `GROUP BY year, month, day`
|
||||
- `GROUP BY year, month`
|
||||
- `GROUP BY year, day`
|
||||
- `GROUP BY year`
|
||||
- `GROUP BY month, day`
|
||||
- `GROUP BY month`
|
||||
- `GROUP BY day`
|
||||
- и общий итог.
|
||||
|
||||
Колонки, которые не участвуют в `GROUP BY`, заполнены нулями.
|
||||
|
||||
```text
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 10 │ 15 │ 1 │
|
||||
│ 2020 │ 1 │ 5 │ 1 │
|
||||
│ 2019 │ 1 │ 5 │ 1 │
|
||||
│ 2020 │ 1 │ 15 │ 1 │
|
||||
│ 2019 │ 1 │ 15 │ 1 │
|
||||
│ 2020 │ 10 │ 5 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 1 │ 0 │ 2 │
|
||||
│ 2020 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2020 │ 0 │ 5 │ 2 │
|
||||
│ 2019 │ 0 │ 5 │ 1 │
|
||||
│ 2020 │ 0 │ 15 │ 2 │
|
||||
│ 2019 │ 0 │ 15 │ 1 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 2019 │ 0 │ 0 │ 2 │
|
||||
│ 2020 │ 0 │ 0 │ 4 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 5 │ 2 │
|
||||
│ 0 │ 10 │ 15 │ 1 │
|
||||
│ 0 │ 10 │ 5 │ 1 │
|
||||
│ 0 │ 1 │ 15 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 1 │ 0 │ 4 │
|
||||
│ 0 │ 10 │ 0 │ 2 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 5 │ 3 │
|
||||
│ 0 │ 0 │ 15 │ 3 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
┌─year─┬─month─┬─day─┬─count()─┐
|
||||
│ 0 │ 0 │ 0 │ 6 │
|
||||
└──────┴───────┴─────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Модификатор WITH TOTALS {#with-totals-modifier}
|
||||
|
||||
Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения).
|
||||
@ -86,8 +233,6 @@ SELECT
|
||||
FROM hits
|
||||
```
|
||||
|
||||
Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций.
|
||||
|
||||
В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max.
|
||||
|
||||
Пример:
|
||||
@ -103,10 +248,6 @@ GROUP BY domain
|
||||
|
||||
GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций.
|
||||
|
||||
Не поддерживается GROUP BY по столбцам-массивам.
|
||||
|
||||
Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`.
|
||||
|
||||
## Детали реализации {#implementation-details}
|
||||
|
||||
Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки.
|
||||
|
@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list
|
||||
[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
|
||||
[PREWHERE expr]
|
||||
[WHERE expr]
|
||||
[GROUP BY expr_list] [WITH TOTALS]
|
||||
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
|
||||
[HAVING expr]
|
||||
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
|
||||
[LIMIT [offset_value, ]n BY columns]
|
||||
|
@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3
|
||||
mkdocs-macros-plugin==0.4.20
|
||||
nltk==3.5
|
||||
nose==1.3.7
|
||||
protobuf==3.13.0
|
||||
protobuf==3.14.0
|
||||
numpy==1.19.2
|
||||
Pygments==2.5.2
|
||||
pymdown-extensions==8.0
|
||||
|
@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
||||
Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
|
||||
|
@ -2,17 +2,17 @@
|
||||
machine_translated: true
|
||||
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
|
||||
toc_priority: 3
|
||||
toc_title: "\u788C\u83BD\u7984Support:"
|
||||
toc_title: "\u5546\u4e1a\u652f\u6301"
|
||||
---
|
||||
|
||||
# ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers}
|
||||
|
||||
!!! info "信息"
|
||||
如果您已经推出ClickHouse商业支持服务,请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。
|
||||
如果您已经推出ClickHouse商业支持服务,请随时 [提交一个 pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。
|
||||
|
||||
## 敏锐性 {#altinity}
|
||||
## Altinity {#altinity}
|
||||
|
||||
隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄and陇.貌路.隆拢脳枚脢虏 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇 访问 [www.altinity.com](https://www.altinity.com/) 欲了解更多信息.
|
||||
Altinity 自从 2017 年开始已经为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。
|
||||
|
||||
## Mafiree {#mafiree}
|
||||
|
||||
|
@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A"
|
||||
|
||||
ClickHouse体验还有如下:
|
||||
[ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse)
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/).
|
||||
更多信息 [云提供商](../commercial/cloud.md).
|
||||
实例托管 [Yandex云](https://cloud.yandex.com/)。
|
||||
更多信息 [云提供商](../commercial/cloud.md)。
|
||||
|
||||
ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的.
|
||||
体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。
|
||||
体验平台也同样提供了ClickHouse HTTPS服务端口。
|
||||
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md).
|
||||
您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。
|
||||
有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。
|
||||
|
||||
| 参数 | 值 |
|
||||
|:---------|:--------------------------------------|
|
||||
|
@ -7,35 +7,37 @@ toc_title: "\u6570\u636E\u5907\u4EFD"
|
||||
|
||||
# 数据备份 {#data-backup}
|
||||
|
||||
碌莽禄While: [复制](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施并不涵盖所有可能的情况,可以规避。
|
||||
尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。
|
||||
|
||||
为了有效地减少可能的人为错误,您应该仔细准备备份和还原数据的策略 **提前**.
|
||||
为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略.
|
||||
|
||||
每家公司都有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 什么适用于一千兆字节的数据可能不会为几十pb的工作。 有多种可能的方法有自己的优点和缺点,这将在下面讨论。 这是一个好主意,使用几种方法,而不是只是一个,以弥补其各种缺点。
|
||||
不同公司有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 适用于 1GB 的数据的方案可能并不适用于几十 PB 数据的情况。 有多种可能的并有自己优缺点的方法,这将在下面讨论。 好的主意是同时结合使用多种方法而不是仅使用一种,这样可以弥补不同方法各自的缺点。
|
||||
|
||||
!!! note "注"
|
||||
请记住,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时(或者至少需要比业务能够容忍的时间更长),恢复可能无法正常工作。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上练习。
|
||||
|
||||
## 将源数据复制到其他地方 {#duplicating-source-data-somewhere-else}
|
||||
|
||||
通常被摄入到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html).
|
||||
通常被聚集到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html).
|
||||
|
||||
## 文件系统快照 {#filesystem-snapshots}
|
||||
|
||||
某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们从 [分布](../engines/table-engines/special/distributed.md) 用于以下目的的表 `SELECT` 查询。 任何修改数据的查询都无法访问此类副本上的快照。 作为奖励,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。
|
||||
|
||||
## ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier}
|
||||
## clickhouse-copier {#clickhouse-copier}
|
||||
|
||||
[ツ环板-ョツ嘉ッツ偲](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 它还可用于备份和还原目的,因为它可以在ClickHouse表和集群之间可靠地复制数据。
|
||||
[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 因为它可以在ClickHouse表和集群之间可靠地复制数据,所以它还可用于备份和还原数据。
|
||||
|
||||
对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。
|
||||
|
||||
## 部件操作 {#manipulations-with-parts}
|
||||
|
||||
ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是使用硬链接来实现 `/var/lib/clickhouse/shadow/` 文件夹中,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)).
|
||||
ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)).
|
||||
|
||||
数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。
|
||||
|
||||
有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
|
||||
|
||||
第三方工具可用于自动化此方法: [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup).
|
||||
第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).
|
||||
|
||||
[原始文章](https://clickhouse.tech/docs/en/operations/backup/) <!--hide-->
|
||||
|
@ -33,10 +33,10 @@ ClickHouse 收集的指标项:
|
||||
- 服务用于计算的资源占用的各种指标。
|
||||
- 关于查询处理的常见统计信息。
|
||||
|
||||
可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。
|
||||
可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。
|
||||
|
||||
可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。
|
||||
可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。
|
||||
|
||||
此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。
|
||||
此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。
|
||||
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。
|
||||
|
@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
|
||||
PRIMARY KEY key1, key2
|
||||
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))
|
||||
LAYOUT(LAYOUT_NAME([param_name param_value]))
|
||||
LIFETIME([MIN val1] MAX val2)
|
||||
LIFETIME({MIN min_val MAX max_val | max_val})
|
||||
```
|
||||
|
@ -680,7 +680,7 @@ void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & fil
|
||||
for (auto & elem : file_changes)
|
||||
{
|
||||
auto & file = elem.second.file_change;
|
||||
if (file.path != file.old_path)
|
||||
if (!file.old_path.empty() && file.path != file.old_path)
|
||||
snapshot[file.path] = snapshot[file.old_path];
|
||||
}
|
||||
|
||||
|
@ -329,14 +329,20 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
bool has_password_for_default_user = false;
|
||||
|
||||
if (!fs::exists(main_config_file))
|
||||
if (!fs::exists(config_d))
|
||||
{
|
||||
fmt::print("Creating config directory {} that is used for tweaks of main server configuration.\n", config_d.string());
|
||||
fs::create_directory(config_d);
|
||||
}
|
||||
|
||||
if (!fs::exists(users_d))
|
||||
{
|
||||
fmt::print("Creating config directory {} that is used for tweaks of users configuration.\n", users_d.string());
|
||||
fs::create_directory(users_d);
|
||||
}
|
||||
|
||||
if (!fs::exists(main_config_file))
|
||||
{
|
||||
std::string_view main_config_content = getResource("config.xml");
|
||||
if (main_config_content.empty())
|
||||
{
|
||||
@ -349,7 +355,30 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
out.sync();
|
||||
out.finalize();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string());
|
||||
|
||||
ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (configuration->has("path"))
|
||||
{
|
||||
data_path = configuration->getString("path");
|
||||
fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path);
|
||||
}
|
||||
|
||||
if (configuration->has("logger.log"))
|
||||
{
|
||||
log_path = fs::path(configuration->getString("logger.log")).remove_filename();
|
||||
fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!fs::exists(users_config_file))
|
||||
{
|
||||
std::string_view users_config_content = getResource("users.xml");
|
||||
if (users_config_content.empty())
|
||||
{
|
||||
@ -365,38 +394,17 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string());
|
||||
|
||||
ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (configuration->has("path"))
|
||||
{
|
||||
data_path = configuration->getString("path");
|
||||
fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path);
|
||||
}
|
||||
|
||||
if (configuration->has("logger.log"))
|
||||
{
|
||||
log_path = fs::path(configuration->getString("logger.log")).remove_filename();
|
||||
fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path);
|
||||
}
|
||||
}
|
||||
fmt::print("Users config file {} already exists, will keep it and extract users info from it.\n", users_config_file.string());
|
||||
|
||||
/// Check if password for default user already specified.
|
||||
ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (fs::exists(users_config_file))
|
||||
if (!configuration->getString("users.default.password", "").empty()
|
||||
|| configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
{
|
||||
ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false);
|
||||
ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig()));
|
||||
|
||||
if (!configuration->getString("users.default.password", "").empty()
|
||||
|| configuration->getString("users.default.password_sha256_hex", "").empty()
|
||||
|| configuration->getString("users.default.password_double_sha1_hex", "").empty())
|
||||
{
|
||||
has_password_for_default_user = true;
|
||||
}
|
||||
has_password_for_default_user = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
PROGRAM(clickhouse-server)
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
PROGRAM(clickhouse)
|
||||
|
||||
CFLAGS(
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -8,7 +8,7 @@ namespace DB
|
||||
{
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter(
|
||||
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const
|
||||
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const
|
||||
{
|
||||
return std::make_shared<AggregateFunctionCountNotNullUnary>(types[0], params);
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ public:
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override;
|
||||
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <AggregateFunctions/AggregateFunctionIf.h>
|
||||
#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
|
||||
#include "registerAggregateFunctions.h"
|
||||
#include "AggregateFunctionNull.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -8,6 +9,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
@ -40,6 +42,164 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/** There are two cases: for single argument and variadic.
|
||||
* Code for single argument is much more efficient.
|
||||
*/
|
||||
template <bool result_is_nullable, bool serialize_flag>
|
||||
class AggregateFunctionIfNullUnary final
|
||||
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
|
||||
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>
|
||||
{
|
||||
private:
|
||||
size_t num_arguments;
|
||||
|
||||
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
|
||||
AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
|
||||
public:
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Base::getName();
|
||||
}
|
||||
|
||||
AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
|
||||
: Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size())
|
||||
{
|
||||
if (num_arguments == 0)
|
||||
throw Exception("Aggregate function " + getName() + " require at least one argument",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
|
||||
{
|
||||
const IColumn * filter_column = columns[num_arguments - 1];
|
||||
if (const ColumnNullable * nullable_column = typeid_cast<const ColumnNullable *>(filter_column))
|
||||
filter_column = nullable_column->getNestedColumnPtr().get();
|
||||
|
||||
return assert_cast<const ColumnUInt8 &>(*filter_column).getData()[row_num];
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
|
||||
const IColumn * nested_column = &column->getNestedColumn();
|
||||
if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments))
|
||||
{
|
||||
this->setFlag(place);
|
||||
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
|
||||
class AggregateFunctionIfNullVariadic final
|
||||
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
|
||||
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>
|
||||
{
|
||||
public:
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return Base::getName();
|
||||
}
|
||||
|
||||
AggregateFunctionIfNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
|
||||
: Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size())
|
||||
{
|
||||
if (number_of_arguments == 1)
|
||||
throw Exception("Logical error: single argument is passed to AggregateFunctionIfNullVariadic", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (number_of_arguments > MAX_ARGS)
|
||||
throw Exception("Maximum number of arguments for aggregate function with Nullable types is " + toString(size_t(MAX_ARGS)),
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (size_t i = 0; i < number_of_arguments; ++i)
|
||||
is_nullable[i] = arguments[i]->isNullable();
|
||||
}
|
||||
|
||||
static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
|
||||
{
|
||||
return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
/// This container stores the columns we really pass to the nested function.
|
||||
const IColumn * nested_columns[number_of_arguments];
|
||||
|
||||
for (size_t i = 0; i < number_of_arguments; ++i)
|
||||
{
|
||||
if (is_nullable[i])
|
||||
{
|
||||
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
|
||||
if (null_is_skipped && nullable_col.isNullAt(row_num))
|
||||
{
|
||||
/// If at least one column has a null value in the current row,
|
||||
/// we don't process this row.
|
||||
return;
|
||||
}
|
||||
nested_columns[i] = &nullable_col.getNestedColumn();
|
||||
}
|
||||
else
|
||||
nested_columns[i] = columns[i];
|
||||
}
|
||||
|
||||
if (singleFilter(nested_columns, row_num, number_of_arguments))
|
||||
{
|
||||
this->setFlag(place);
|
||||
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
|
||||
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;
|
||||
|
||||
enum { MAX_ARGS = 8 };
|
||||
size_t number_of_arguments = 0;
|
||||
std::array<char, MAX_ARGS> is_nullable; /// Plain array is better than std::vector due to one indirection less.
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
|
||||
const Array & params, const AggregateFunctionProperties & properties) const
|
||||
{
|
||||
bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable();
|
||||
size_t nullable_size = std::count_if(arguments.begin(), arguments.end(), [](const auto & element) { return element->isNullable(); });
|
||||
return_type_is_nullable &= nullable_size != 1 || !arguments.back()->isNullable(); /// If only condition is nullable. we should non-nullable type.
|
||||
bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null;
|
||||
|
||||
if (arguments.size() <= 2 && arguments.front()->isNullable())
|
||||
{
|
||||
if (return_type_is_nullable)
|
||||
{
|
||||
return std::make_shared<AggregateFunctionIfNullUnary<true, true>>(nested_func, arguments, params);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (serialize_flag)
|
||||
return std::make_shared<AggregateFunctionIfNullUnary<false, true>>(nested_func, arguments, params);
|
||||
else
|
||||
return std::make_shared<AggregateFunctionIfNullUnary<false, false>>(nested_func, arguments, params);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (return_type_is_nullable)
|
||||
{
|
||||
return std::make_shared<AggregateFunctionIfNullVariadic<true, true, true>>(nested_function, arguments, params);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (serialize_flag)
|
||||
return std::make_shared<AggregateFunctionIfNullVariadic<false, true, true>>(nested_function, arguments, params);
|
||||
else
|
||||
return std::make_shared<AggregateFunctionIfNullVariadic<false, false, true>>(nested_function, arguments, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory & factory)
|
||||
{
|
||||
factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorIf>());
|
||||
|
@ -109,6 +109,10 @@ public:
|
||||
{
|
||||
return nested_func->isState();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments,
|
||||
const Array & params, const AggregateFunctionProperties & properties) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ void LinearModelData::updateState()
|
||||
|
||||
void LinearModelData::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const
|
||||
@ -264,8 +264,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
|
||||
average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac;
|
||||
average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac;
|
||||
}
|
||||
beta1_powered_ *= adam_rhs.beta1_powered_;
|
||||
beta2_powered_ *= adam_rhs.beta2_powered_;
|
||||
beta1_powered *= adam_rhs.beta1_powered;
|
||||
beta2_powered *= adam_rhs.beta2_powered;
|
||||
}
|
||||
|
||||
void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
|
||||
@ -282,21 +282,21 @@ void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & b
|
||||
for (size_t i = 0; i != average_gradient.size(); ++i)
|
||||
{
|
||||
Float64 normed_gradient = batch_gradient[i] / batch_size;
|
||||
average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient;
|
||||
average_squared_gradient[i] = beta2_ * average_squared_gradient[i] +
|
||||
(1 - beta2_) * normed_gradient * normed_gradient;
|
||||
average_gradient[i] = beta1 * average_gradient[i] + (1 - beta1) * normed_gradient;
|
||||
average_squared_gradient[i] = beta2 * average_squared_gradient[i] +
|
||||
(1 - beta2) * normed_gradient * normed_gradient;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
weights[i] += (learning_rate * average_gradient[i]) /
|
||||
((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_));
|
||||
((1 - beta1_powered) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered)) + eps));
|
||||
}
|
||||
bias += (learning_rate * average_gradient[weights.size()]) /
|
||||
((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_));
|
||||
((1 - beta1_powered) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered)) + eps));
|
||||
|
||||
beta1_powered_ *= beta1_;
|
||||
beta2_powered_ *= beta2_;
|
||||
beta1_powered *= beta1;
|
||||
beta2_powered *= beta2;
|
||||
}
|
||||
|
||||
void Adam::addToBatch(
|
||||
@ -348,7 +348,7 @@ void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
}
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
@ -375,9 +375,9 @@ void Nesterov::addToBatch(
|
||||
std::vector<Float64> shifted_weights(weights.size());
|
||||
for (size_t i = 0; i != shifted_weights.size(); ++i)
|
||||
{
|
||||
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_;
|
||||
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha;
|
||||
}
|
||||
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_;
|
||||
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha;
|
||||
|
||||
gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num);
|
||||
}
|
||||
@ -411,7 +411,7 @@ void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size;
|
||||
}
|
||||
for (size_t i = 0; i < weights.size(); ++i)
|
||||
{
|
||||
@ -448,7 +448,7 @@ void IWeightsUpdater::addToBatch(
|
||||
|
||||
void LogisticRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -516,7 +516,7 @@ void LogisticRegression::compute(
|
||||
|
||||
void LinearRegression::predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
|
@ -23,7 +23,7 @@ GradientComputer class computes gradient according to its loss function
|
||||
class IGradientComputer
|
||||
{
|
||||
public:
|
||||
IGradientComputer() {}
|
||||
IGradientComputer() = default;
|
||||
|
||||
virtual ~IGradientComputer() = default;
|
||||
|
||||
@ -39,7 +39,7 @@ public:
|
||||
|
||||
virtual void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -51,7 +51,7 @@ public:
|
||||
class LinearRegression : public IGradientComputer
|
||||
{
|
||||
public:
|
||||
LinearRegression() {}
|
||||
LinearRegression() = default;
|
||||
|
||||
void compute(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -64,7 +64,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -76,7 +76,7 @@ public:
|
||||
class LogisticRegression : public IGradientComputer
|
||||
{
|
||||
public:
|
||||
LogisticRegression() {}
|
||||
LogisticRegression() = default;
|
||||
|
||||
void compute(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -89,7 +89,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const std::vector<Float64> & weights,
|
||||
@ -147,9 +147,9 @@ public:
|
||||
class Momentum : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Momentum() {}
|
||||
Momentum() = default;
|
||||
|
||||
Momentum(Float64 alpha) : alpha_(alpha) {}
|
||||
explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
@ -160,7 +160,7 @@ public:
|
||||
void read(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
Float64 alpha_{0.1};
|
||||
Float64 alpha{0.1};
|
||||
std::vector<Float64> accumulated_gradient;
|
||||
};
|
||||
|
||||
@ -168,9 +168,9 @@ private:
|
||||
class Nesterov : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Nesterov() {}
|
||||
Nesterov() = default;
|
||||
|
||||
Nesterov(Float64 alpha) : alpha_(alpha) {}
|
||||
explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
|
||||
|
||||
void addToBatch(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -191,7 +191,7 @@ public:
|
||||
void read(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
const Float64 alpha_ = 0.9;
|
||||
const Float64 alpha = 0.9;
|
||||
std::vector<Float64> accumulated_gradient;
|
||||
};
|
||||
|
||||
@ -201,8 +201,8 @@ class Adam : public IWeightsUpdater
|
||||
public:
|
||||
Adam()
|
||||
{
|
||||
beta1_powered_ = beta1_;
|
||||
beta2_powered_ = beta2_;
|
||||
beta1_powered = beta1;
|
||||
beta2_powered = beta2;
|
||||
}
|
||||
|
||||
void addToBatch(
|
||||
@ -225,11 +225,11 @@ public:
|
||||
|
||||
private:
|
||||
/// beta1 and beta2 hyperparameters have such recommended values
|
||||
const Float64 beta1_ = 0.9;
|
||||
const Float64 beta2_ = 0.999;
|
||||
const Float64 eps_ = 0.000001;
|
||||
Float64 beta1_powered_;
|
||||
Float64 beta2_powered_;
|
||||
const Float64 beta1 = 0.9;
|
||||
const Float64 beta2 = 0.999;
|
||||
const Float64 eps = 0.000001;
|
||||
Float64 beta1_powered;
|
||||
Float64 beta2_powered;
|
||||
|
||||
std::vector<Float64> average_gradient;
|
||||
std::vector<Float64> average_squared_gradient;
|
||||
@ -241,7 +241,7 @@ private:
|
||||
class LinearModelData
|
||||
{
|
||||
public:
|
||||
LinearModelData() {}
|
||||
LinearModelData() = default;
|
||||
|
||||
LinearModelData(
|
||||
Float64 learning_rate_,
|
||||
@ -261,7 +261,7 @@ public:
|
||||
|
||||
void predict(
|
||||
ColumnVector<Float64>::Container & container,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const;
|
||||
@ -360,7 +360,7 @@ public:
|
||||
void predictValues(
|
||||
ConstAggregateDataPtr place,
|
||||
IColumn & to,
|
||||
ColumnsWithTypeAndName & arguments,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
const Context & context) const override
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
|
||||
assert(nested_function);
|
||||
|
||||
if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params))
|
||||
if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params, properties))
|
||||
return adapter;
|
||||
|
||||
/// If applied to aggregate function with -State combinator, we apply -Null combinator to it's nested_function instead of itself.
|
||||
|
@ -187,7 +187,10 @@ struct AggregateFunctionTimeSeriesGroupSumData
|
||||
{
|
||||
size_t size = result.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(result.data()), sizeof(result[0]));
|
||||
if (size > 0)
|
||||
{
|
||||
buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
@ -195,7 +198,10 @@ struct AggregateFunctionTimeSeriesGroupSumData
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
result.resize(size);
|
||||
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
|
||||
if (size > 0)
|
||||
{
|
||||
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
|
||||
}
|
||||
}
|
||||
};
|
||||
template <bool rate>
|
||||
|
@ -239,7 +239,8 @@ public:
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
{
|
||||
return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ using ConstAggregateDataPtr = const char *;
|
||||
|
||||
class IAggregateFunction;
|
||||
using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
|
||||
struct AggregateFunctionProperties;
|
||||
|
||||
/** Aggregate functions interface.
|
||||
* Instances of classes with this interface do not contain the data itself for aggregation,
|
||||
@ -60,7 +61,7 @@ public:
|
||||
throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
virtual ~IAggregateFunction() {}
|
||||
virtual ~IAggregateFunction() = default;
|
||||
|
||||
/** Data manipulating functions. */
|
||||
|
||||
@ -113,7 +114,7 @@ public:
|
||||
virtual void predictValues(
|
||||
ConstAggregateDataPtr /* place */,
|
||||
IColumn & /*to*/,
|
||||
ColumnsWithTypeAndName & /*arguments*/,
|
||||
const ColumnsWithTypeAndName & /*arguments*/,
|
||||
size_t /*offset*/,
|
||||
size_t /*limit*/,
|
||||
const Context & /*context*/) const
|
||||
@ -185,7 +186,8 @@ public:
|
||||
* arguments and params are for nested_function.
|
||||
*/
|
||||
virtual AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const
|
||||
const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/,
|
||||
const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1,19 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <common/types.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <common/sort.h>
|
||||
#include <common/types.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <miniselect/floyd_rivest_select.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
@ -89,12 +87,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
|
||||
if (!array.empty())
|
||||
{
|
||||
size_t n = level < 1 ? level * array.size() : (array.size() - 1);
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm.
|
||||
#else
|
||||
std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm.
|
||||
#endif
|
||||
nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm.
|
||||
return array[n];
|
||||
}
|
||||
|
||||
@ -113,12 +106,7 @@ struct QuantileExact : QuantileExactBase<Value, QuantileExact<Value>>
|
||||
auto level = levels[indices[i]];
|
||||
|
||||
size_t n = level < 1 ? level * array.size() : (array.size() - 1);
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
#endif
|
||||
nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
result[indices[i]] = array[n];
|
||||
prev_n = n;
|
||||
}
|
||||
@ -154,14 +142,10 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
else if (n < 1)
|
||||
return static_cast<Float64>(array[0]);
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
#endif
|
||||
auto nth_element = std::min_element(array.begin() + n, array.end());
|
||||
nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
}
|
||||
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
@ -187,14 +171,10 @@ struct QuantileExactExclusive : public QuantileExact<Value>
|
||||
result[indices[i]] = static_cast<Float64>(array[0]);
|
||||
else
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
#endif
|
||||
auto nth_element = std::min_element(array.begin() + n, array.end());
|
||||
nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
prev_n = n - 1;
|
||||
}
|
||||
}
|
||||
@ -226,14 +206,10 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
return static_cast<Float64>(array[array.size() - 1]);
|
||||
else if (n < 1)
|
||||
return static_cast<Float64>(array[0]);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
#endif
|
||||
auto nth_element = std::min_element(array.begin() + n, array.end());
|
||||
nth_element(array.begin(), array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
|
||||
return static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
}
|
||||
|
||||
return std::numeric_limits<Float64>::quiet_NaN();
|
||||
@ -257,14 +233,10 @@ struct QuantileExactInclusive : public QuantileExact<Value>
|
||||
result[indices[i]] = static_cast<Float64>(array[0]);
|
||||
else
|
||||
{
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
#endif
|
||||
auto nth_element = std::min_element(array.begin() + n, array.end());
|
||||
nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end());
|
||||
auto nth_elem = std::min_element(array.begin() + n, array.end());
|
||||
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_element - array[n - 1]);
|
||||
result[indices[i]] = static_cast<Float64>(array[n - 1]) + (h - n) * static_cast<Float64>(*nth_elem - array[n - 1]);
|
||||
prev_n = n - 1;
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
|
||||
}
|
||||
|
||||
|
||||
@ -36,10 +37,11 @@ namespace ErrorCodes
|
||||
* uses asin, which slows down the algorithm a bit.
|
||||
*/
|
||||
template <typename T>
|
||||
class TDigest
|
||||
class QuantileTDigest
|
||||
{
|
||||
using Value = Float32;
|
||||
using Count = Float32;
|
||||
using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count
|
||||
|
||||
/** The centroid stores the weight of points around their mean value
|
||||
*/
|
||||
@ -55,13 +57,6 @@ class TDigest
|
||||
, count(count_)
|
||||
{}
|
||||
|
||||
Centroid & operator+=(const Centroid & other)
|
||||
{
|
||||
count += other.count;
|
||||
mean += other.count * (other.mean - mean) / count;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const Centroid & other) const
|
||||
{
|
||||
return mean < other.mean;
|
||||
@ -71,26 +66,42 @@ class TDigest
|
||||
|
||||
/** :param epsilon: value \delta from the article - error in the range
|
||||
* quantile 0.5 (default is 0.01, i.e. 1%)
|
||||
* if you change epsilon, you must also change max_centroids
|
||||
* :param max_centroids: depends on epsilon, the better accuracy, the more centroids you need
|
||||
* to describe data with this accuracy. Read article before changing.
|
||||
* :param max_unmerged: when accumulating count of new points beyond this
|
||||
* value centroid compression is triggered
|
||||
* (default is 2048, the higher the value - the
|
||||
* more memory is required, but amortization of execution time increases)
|
||||
* Change freely anytime.
|
||||
*/
|
||||
struct Params
|
||||
{
|
||||
Value epsilon = 0.01;
|
||||
size_t max_centroids = 2048;
|
||||
size_t max_unmerged = 2048;
|
||||
};
|
||||
/** max_centroids_deserialize should be >= all max_centroids ever used in production.
|
||||
* This is security parameter, preventing allocation of too much centroids in deserialize, so can be relatively large.
|
||||
*/
|
||||
static constexpr size_t max_centroids_deserialize = 65536;
|
||||
|
||||
Params params;
|
||||
static constexpr Params params{};
|
||||
|
||||
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
|
||||
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
|
||||
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(BetterFloat) - sizeof(size_t); // If alignment is imperfect, sizeof(TDigest) will be more than naively expected
|
||||
using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
|
||||
|
||||
Centroids centroids;
|
||||
Count count = 0;
|
||||
UInt32 unmerged = 0;
|
||||
BetterFloat count = 0;
|
||||
size_t unmerged = 0;
|
||||
|
||||
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
*/
|
||||
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
|
||||
{
|
||||
double k = (x - x1) / (x2 - x1);
|
||||
return y1 + k * (y2 - y1);
|
||||
}
|
||||
|
||||
struct RadixSortTraits
|
||||
{
|
||||
@ -111,15 +122,56 @@ class TDigest
|
||||
};
|
||||
|
||||
/** Adds a centroid `c` to the digest
|
||||
* centroid must be valid, validity is checked in add(), deserialize() and is maintained by compress()
|
||||
*/
|
||||
void addCentroid(const Centroid & c)
|
||||
{
|
||||
centroids.push_back(c);
|
||||
count += c.count;
|
||||
++unmerged;
|
||||
if (unmerged >= params.max_unmerged)
|
||||
if (unmerged > params.max_unmerged)
|
||||
compress();
|
||||
}
|
||||
void compressBrute()
|
||||
{
|
||||
if (centroids.size() <= params.max_centroids)
|
||||
return;
|
||||
const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2
|
||||
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
BetterFloat sum = 0;
|
||||
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
|
||||
BetterFloat l_count = l->count;
|
||||
size_t batch_pos = 0;
|
||||
for (;r != centroids.end(); ++r)
|
||||
{
|
||||
if (batch_pos < batch_size - 1)
|
||||
{
|
||||
/// The left column "eats" the right. Middle of the batch
|
||||
l_count += r->count;
|
||||
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
|
||||
l->mean = l_mean;
|
||||
l->count = l_count;
|
||||
batch_pos += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// End of the batch, start the next one
|
||||
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
*l = *r;
|
||||
l_mean = l->mean;
|
||||
l_count = l->count;
|
||||
batch_pos = 0;
|
||||
}
|
||||
}
|
||||
count = sum + l_count; // Update count, it might be different due to += inaccuracy
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
// Here centroids.size() <= params.max_centroids
|
||||
}
|
||||
|
||||
public:
|
||||
/** Performs compression of accumulated centroids
|
||||
@ -128,74 +180,92 @@ public:
|
||||
*/
|
||||
void compress()
|
||||
{
|
||||
if (unmerged > 0)
|
||||
if (unmerged > 0 || centroids.size() > params.max_centroids)
|
||||
{
|
||||
// unmerged > 0 implies centroids.size() > 0, hence *l is valid below
|
||||
RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size());
|
||||
|
||||
if (centroids.size() > 3)
|
||||
/// A pair of consecutive bars of the histogram.
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
|
||||
const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization
|
||||
BetterFloat sum = 0;
|
||||
BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability
|
||||
BetterFloat l_count = l->count;
|
||||
while (r != centroids.end())
|
||||
{
|
||||
/// A pair of consecutive bars of the histogram.
|
||||
auto l = centroids.begin();
|
||||
auto r = std::next(l);
|
||||
|
||||
Count sum = 0;
|
||||
while (r != centroids.end())
|
||||
if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress
|
||||
{
|
||||
// we use quantile which gives us the smallest error
|
||||
|
||||
/// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
|
||||
Value ql = (sum + l->count * 0.5) / count;
|
||||
Value err = ql * (1 - ql);
|
||||
|
||||
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
|
||||
Value qr = (sum + l->count + r->count * 0.5) / count;
|
||||
Value err2 = qr * (1 - qr);
|
||||
|
||||
if (err > err2)
|
||||
err = err2;
|
||||
|
||||
Value k = 4 * count * err * params.epsilon;
|
||||
|
||||
/** The ratio of the weight of the glued column pair to all values is not greater,
|
||||
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
|
||||
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
|
||||
*/
|
||||
|
||||
if (l->count + r->count <= k)
|
||||
{
|
||||
// it is possible to merge left and right
|
||||
/// The left column "eats" the right.
|
||||
*l += *r;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not enough capacity, check the next pair
|
||||
sum += l->count;
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
if (l != r)
|
||||
*l = *r;
|
||||
}
|
||||
l_count += r->count;
|
||||
l->count = l_count;
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
// we use quantile which gives us the smallest error
|
||||
|
||||
/// At the end of the loop, all values to the right of l were "eaten".
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
/// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l.
|
||||
BetterFloat ql = (sum + l_count * 0.5) / count;
|
||||
BetterFloat err = ql * (1 - ql);
|
||||
|
||||
/// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r.
|
||||
BetterFloat qr = (sum + l_count + r->count * 0.5) / count;
|
||||
BetterFloat err2 = qr * (1 - qr);
|
||||
|
||||
if (err > err2)
|
||||
err = err2;
|
||||
|
||||
BetterFloat k = count_epsilon_4 * err;
|
||||
|
||||
/** The ratio of the weight of the glued column pair to all values is not greater,
|
||||
* than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2),
|
||||
* and at the edges decreases and is approximately equal to the distance to the edge * 4.
|
||||
*/
|
||||
|
||||
if (l_count + r->count <= k)
|
||||
{
|
||||
// it is possible to merge left and right
|
||||
/// The left column "eats" the right.
|
||||
l_count += r->count;
|
||||
l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower
|
||||
l->mean = l_mean;
|
||||
l->count = l_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not enough capacity, check the next pair
|
||||
sum += l->count; // Not l_count, otherwise actual sum of elements will be different
|
||||
++l;
|
||||
|
||||
/// We skip all the values "eaten" earlier.
|
||||
if (l != r)
|
||||
*l = *r;
|
||||
l_mean = l->mean;
|
||||
l_count = l->count;
|
||||
}
|
||||
++r;
|
||||
}
|
||||
count = sum + l_count; // Update count, it might be different due to += inaccuracy
|
||||
|
||||
/// At the end of the loop, all values to the right of l were "eaten".
|
||||
centroids.resize(l - centroids.begin() + 1);
|
||||
unmerged = 0;
|
||||
}
|
||||
// Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above
|
||||
compressBrute();
|
||||
}
|
||||
|
||||
/** Adds to the digest a change in `x` with a weight of `cnt` (default 1)
|
||||
*/
|
||||
void add(T x, UInt64 cnt = 1)
|
||||
{
|
||||
addCentroid(Centroid(Value(x), Count(cnt)));
|
||||
auto vx = static_cast<Value>(x);
|
||||
if (cnt == 0 || std::isnan(vx))
|
||||
return; // Count 0 breaks compress() assumptions, Nan breaks sort(). We treat them as no sample.
|
||||
addCentroid(Centroid{vx, static_cast<Count>(cnt)});
|
||||
}
|
||||
|
||||
void merge(const TDigest & other)
|
||||
void merge(const QuantileTDigest & other)
|
||||
{
|
||||
for (const auto & c : other.centroids)
|
||||
addCentroid(c);
|
||||
@ -213,89 +283,23 @@ public:
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (size > params.max_unmerged)
|
||||
if (size > max_centroids_deserialize)
|
||||
throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||
|
||||
centroids.resize(size);
|
||||
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
|
||||
|
||||
count = 0;
|
||||
for (const auto & c : centroids)
|
||||
count += c.count;
|
||||
}
|
||||
|
||||
Count getCount()
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
const Centroids & getCentroids() const
|
||||
{
|
||||
return centroids;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
centroids.resize(0);
|
||||
count = 0;
|
||||
unmerged = 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class QuantileTDigest
|
||||
{
|
||||
using Value = Float32;
|
||||
using Count = Float32;
|
||||
centroids.resize(size);
|
||||
// From now, TDigest will be in invalid state if exception is thrown.
|
||||
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
|
||||
|
||||
/** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold
|
||||
* we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of
|
||||
* centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows,
|
||||
* but merging two big t-digest decreases it).
|
||||
*/
|
||||
TDigest<T> main_tdigest;
|
||||
TDigest<T> sub_tdigest;
|
||||
size_t merge_threshold = 1e7;
|
||||
|
||||
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
|
||||
*/
|
||||
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
|
||||
{
|
||||
double k = (x - x1) / (x2 - x1);
|
||||
return y1 + k * (y2 - y1);
|
||||
}
|
||||
|
||||
void mergeTDigests()
|
||||
{
|
||||
main_tdigest.merge(sub_tdigest);
|
||||
sub_tdigest.reset();
|
||||
}
|
||||
|
||||
public:
|
||||
void add(T x, UInt64 cnt = 1)
|
||||
{
|
||||
if (sub_tdigest.getCount() >= merge_threshold)
|
||||
mergeTDigests();
|
||||
sub_tdigest.add(x, cnt);
|
||||
}
|
||||
|
||||
void merge(const QuantileTDigest & other)
|
||||
{
|
||||
mergeTDigests();
|
||||
main_tdigest.merge(other.main_tdigest);
|
||||
main_tdigest.merge(other.sub_tdigest);
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf)
|
||||
{
|
||||
mergeTDigests();
|
||||
main_tdigest.serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
sub_tdigest.reset();
|
||||
main_tdigest.deserialize(buf);
|
||||
for (const auto & c : centroids)
|
||||
{
|
||||
if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort()
|
||||
throw Exception("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
|
||||
count += c.count;
|
||||
}
|
||||
compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params
|
||||
}
|
||||
|
||||
/** Calculates the quantile q [0, 1] based on the digest.
|
||||
@ -304,18 +308,15 @@ public:
|
||||
template <typename ResultType>
|
||||
ResultType getImpl(Float64 level)
|
||||
{
|
||||
mergeTDigests();
|
||||
|
||||
auto & centroids = main_tdigest.getCentroids();
|
||||
if (centroids.empty())
|
||||
return std::is_floating_point_v<ResultType> ? NAN : 0;
|
||||
|
||||
main_tdigest.compress();
|
||||
compress();
|
||||
|
||||
if (centroids.size() == 1)
|
||||
return centroids.front().mean;
|
||||
|
||||
Float64 x = level * main_tdigest.getCount();
|
||||
Float64 x = level * count;
|
||||
Float64 prev_x = 0;
|
||||
Count sum = 0;
|
||||
Value prev_mean = centroids.front().mean;
|
||||
@ -343,9 +344,6 @@ public:
|
||||
template <typename ResultType>
|
||||
void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result)
|
||||
{
|
||||
mergeTDigests();
|
||||
|
||||
auto & centroids = main_tdigest.getCentroids();
|
||||
if (centroids.empty())
|
||||
{
|
||||
for (size_t result_num = 0; result_num < size; ++result_num)
|
||||
@ -353,7 +351,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
main_tdigest.compress();
|
||||
compress();
|
||||
|
||||
if (centroids.size() == 1)
|
||||
{
|
||||
@ -362,7 +360,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount();
|
||||
Float64 x = levels[levels_permutation[0]] * count;
|
||||
Float64 prev_x = 0;
|
||||
Count sum = 0;
|
||||
Value prev_mean = centroids.front().mean;
|
||||
@ -380,7 +378,7 @@ public:
|
||||
if (result_num >= size)
|
||||
return;
|
||||
|
||||
x = levels[levels_permutation[result_num]] * main_tdigest.getCount();
|
||||
x = levels[levels_permutation[result_num]] * count;
|
||||
}
|
||||
|
||||
sum += c.count;
|
||||
|
@ -1,15 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <common/sort.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <miniselect/floyd_rivest_select.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -140,7 +138,7 @@ namespace detail
|
||||
using Array = PODArray<UInt16, 128>;
|
||||
mutable Array elems; /// mutable because array sorting is not considered a state change.
|
||||
|
||||
QuantileTimingMedium() {}
|
||||
QuantileTimingMedium() = default;
|
||||
QuantileTimingMedium(const UInt16 * begin, const UInt16 * end) : elems(begin, end) {}
|
||||
|
||||
void insert(UInt64 x)
|
||||
@ -182,11 +180,7 @@ namespace detail
|
||||
|
||||
/// Sorting an array will not be considered a violation of constancy.
|
||||
auto & array = elems;
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin(), array.begin() + n, array.end());
|
||||
#endif
|
||||
nth_element(array.begin(), array.begin() + n, array.end());
|
||||
quantile = array[n];
|
||||
}
|
||||
|
||||
@ -207,11 +201,7 @@ namespace detail
|
||||
? level * elems.size()
|
||||
: (elems.size() - 1);
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
#else
|
||||
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
#endif
|
||||
nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
||||
|
||||
result[level_index] = array[n];
|
||||
prev_n = n;
|
||||
@ -282,7 +272,7 @@ namespace detail
|
||||
}
|
||||
|
||||
public:
|
||||
Iterator(const QuantileTimingLarge & parent)
|
||||
explicit Iterator(const QuantileTimingLarge & parent)
|
||||
: begin(parent.count_small), pos(begin), end(&parent.count_big[BIG_SIZE])
|
||||
{
|
||||
adjust();
|
||||
@ -429,8 +419,8 @@ namespace detail
|
||||
template <typename ResultType>
|
||||
void getMany(const double * levels, const size_t * indices, size_t size, ResultType * result) const
|
||||
{
|
||||
const auto indices_end = indices + size;
|
||||
auto index = indices;
|
||||
const auto * indices_end = indices + size;
|
||||
const auto * index = indices;
|
||||
|
||||
UInt64 pos = std::ceil(count * levels[*index]);
|
||||
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -73,6 +73,11 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
|
||||
{
|
||||
#if USE_SSL
|
||||
socket = std::make_unique<Poco::Net::SecureStreamSocket>();
|
||||
|
||||
/// we resolve the ip when we open SecureStreamSocket, so to make Server Name Indication (SNI)
|
||||
/// work we need to pass host name separately. It will be send into TLS Hello packet to let
|
||||
/// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI).
|
||||
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host);
|
||||
#else
|
||||
throw Exception{"tcp_secure protocol is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED};
|
||||
#endif
|
||||
|
@ -1,4 +1,6 @@
|
||||
# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -1,3 +1,5 @@
|
||||
OWNER(g:clickhouse)
|
||||
|
||||
LIBRARY()
|
||||
|
||||
PEERDIR(
|
||||
|
@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
|
||||
return res;
|
||||
}
|
||||
|
||||
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const
|
||||
MutableColumnPtr ColumnAggregateFunction::predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const
|
||||
{
|
||||
MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn();
|
||||
res->reserve(data.size());
|
||||
|
@ -119,7 +119,7 @@ public:
|
||||
const char * getFamilyName() const override { return "AggregateFunction"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
|
||||
|
||||
MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const;
|
||||
MutableColumnPtr predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const;
|
||||
|
||||
size_t size() const override
|
||||
{
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <common/sort.h>
|
||||
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
@ -20,10 +21,6 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <miniselect/floyd_rivest_select.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -786,11 +783,7 @@ void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
if (limit)
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
#else
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
#endif
|
||||
partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
else
|
||||
std::sort(res.begin(), res.end(), less);
|
||||
}
|
||||
@ -842,11 +835,7 @@ void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRa
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
#else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
#endif
|
||||
partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
|
@ -138,4 +138,12 @@ void ColumnConst::updateWeakHash32(WeakHash32 & hash) const
|
||||
value = intHashCRC32(data_hash, value);
|
||||
}
|
||||
|
||||
void ColumnConst::compareColumn(
|
||||
const IColumn & rhs, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> & compare_results, int, int nan_direction_hint)
|
||||
const
|
||||
{
|
||||
Int8 res = compareAt(1, 1, rhs, nan_direction_hint);
|
||||
std::fill(compare_results.begin(), compare_results.end(), res);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -199,11 +199,7 @@ public:
|
||||
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override
|
||||
{
|
||||
return data->compareColumn(rhs, rhs_row_num, row_indexes,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
int direction, int nan_direction_hint) const override;
|
||||
|
||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||
|
||||
|
@ -7,10 +7,8 @@
|
||||
#include <Core/BigInt.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <common/sort.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <miniselect/floyd_rivest_select.h> // Y_IGNORE
|
||||
#endif
|
||||
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -57,32 +55,16 @@ void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
template <typename T>
|
||||
StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
auto * pos = arena.allocContinue(sizeof(T), begin);
|
||||
memcpy(pos, &data[n], sizeof(T));
|
||||
return StringRef(pos, sizeof(T));
|
||||
}
|
||||
else
|
||||
{
|
||||
char * pos = arena.allocContinue(BigInt<T>::size, begin);
|
||||
return BigInt<Int256>::serialize(data[n], pos);
|
||||
}
|
||||
auto * pos = arena.allocContinue(sizeof(T), begin);
|
||||
memcpy(pos, &data[n], sizeof(T));
|
||||
return StringRef(pos, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
data.push_back(unalignedLoad<T>(pos));
|
||||
return pos + sizeof(T);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.push_back(BigInt<Int256>::deserialize(pos));
|
||||
return pos + BigInt<Int256>::size;
|
||||
}
|
||||
data.push_back(unalignedLoad<T>(pos));
|
||||
return pos + sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -197,21 +179,11 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
#else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
#endif
|
||||
else
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
#else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
#endif
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -264,24 +236,13 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
|
||||
new_col.data.resize(size);
|
||||
|
||||
size_t count = std::min(this->size(), size);
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
||||
|
||||
if (size > count)
|
||||
{
|
||||
void * tail = &new_col.data[count];
|
||||
memset(tail, 0, (size - count) * sizeof(T));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < count; i++)
|
||||
new_col.data[i] = data[i];
|
||||
memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
|
||||
|
||||
if (size > count)
|
||||
for (size_t i = count; i < size; i++)
|
||||
new_col.data[i] = T{};
|
||||
if (size > count)
|
||||
{
|
||||
void * tail = &new_col.data[count];
|
||||
memset(tail, 0, (size - count) * sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
@ -291,16 +252,9 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
|
||||
template <typename T>
|
||||
void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
|
||||
{
|
||||
if constexpr (is_POD)
|
||||
{
|
||||
T tmp;
|
||||
memcpy(&tmp, src, sizeof(T));
|
||||
data.emplace_back(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.push_back(BigInt<Int256>::deserialize(src));
|
||||
}
|
||||
T tmp;
|
||||
memcpy(&tmp, src, sizeof(T));
|
||||
data.emplace_back(tmp);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -315,13 +269,8 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
|
||||
|
||||
size_t old_size = data.size();
|
||||
data.resize(old_size + length);
|
||||
if constexpr (is_POD)
|
||||
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < length; i++)
|
||||
data[old_size + i] = src_vec.data[start + i];
|
||||
}
|
||||
|
||||
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user