diff --git a/.gitmodules b/.gitmodules index 61b1fe27a4c..081724c54c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -196,7 +196,7 @@ [submodule "contrib/rocksdb"] path = contrib/rocksdb url = https://github.com/facebook/rocksdb - branch = v6.11.4 + branch = v6.14.5 [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz diff --git a/CHANGELOG.md b/CHANGELOG.md index 4474675e9ee..355c664664d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## ClickHouse release 20.11 + +### ClickHouse release v20.11.3.3-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + ### ClickHouse release v20.11.2.1, 2020-11-11 #### Backward Incompatible Change @@ -119,6 +128,24 @@ ## ClickHouse release 20.10 +### ClickHouse release v20.10.4.1-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Improvement + +* Workaround for use S3 with nginx server as proxy. Nginx currenty does not accept urls with empty path like http://domain.com?delete, but vanilla aws-sdk-cpp produces this kind of urls. This commit uses patched aws-sdk-cpp version, which makes urls with "/" as path in this cases, like http://domain.com/?delete. [#16813](https://github.com/ClickHouse/ClickHouse/pull/16813) ([ianton-ru](https://github.com/ianton-ru)). + + ### ClickHouse release v20.10.3.30, 2020-10-28 #### Backward Incompatible Change @@ -331,6 +358,84 @@ ## ClickHouse release 20.9 +### ClickHouse release v20.9.5.5-stable, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + + +### ClickHouse release v20.9.4.76-stable (2020-10-29) + +#### Bug Fix + +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix a very wrong code in TwoLevelStringHashTable implementation, which might lead to memory leak. I'm suprised how this bug can lurk for so long.... [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes https://github.com/ClickHouse/ClickHouse/issues/15628. [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Fix `select count()` inaccuracy for MaterializeMySQL. [#15767](https://github.com/ClickHouse/ClickHouse/pull/15767) ([tavplubix](https://github.com/tavplubix)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes https://github.com/ClickHouse/ClickHouse/issues/15598. [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). + +#### Improvement + +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16160](https://github.com/ClickHouse/ClickHouse/pull/16160) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.9.3.45-stable (2020-10-09) + +#### Bug Fix + +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race condition in AMQP-CPP. [#15667](https://github.com/ClickHouse/ClickHouse/pull/15667) ([alesapin](https://github.com/alesapin)). +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix bug where queries like SELECT toStartOfDay(today()) fail complaining about empty time_zone argument. [#15319](https://github.com/ClickHouse/ClickHouse/pull/15319) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). + +#### Improvement + +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + + ### ClickHouse release v20.9.2.20, 2020-09-22 #### New Feature @@ -405,6 +510,110 @@ ## ClickHouse release 20.8 +### ClickHouse release v20.8.6.6-lts, 2020-11-13 + +#### Bug Fix + +* Fix rare silent crashes when query profiler is on and ClickHouse is installed on OS with glibc version that has (supposedly) broken asynchronous unwind tables for some functions. This fixes [#15301](https://github.com/ClickHouse/ClickHouse/issues/15301). This fixes [#13098](https://github.com/ClickHouse/ClickHouse/issues/13098). [#16846](https://github.com/ClickHouse/ClickHouse/pull/16846) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Now when parsing AVRO from input the LowCardinality is removed from type. Fixes [#16188](https://github.com/ClickHouse/ClickHouse/issues/16188). [#16521](https://github.com/ClickHouse/ClickHouse/pull/16521) ([Mike](https://github.com/myrrc)). +* Fix rapid growth of metadata when using MySQL Master -> MySQL Slave -> ClickHouse MaterializeMySQL Engine, and `slave_parallel_worker` enabled on MySQL Slave, by properly shrinking GTID sets. This fixes [#15951](https://github.com/ClickHouse/ClickHouse/issues/15951). [#16504](https://github.com/ClickHouse/ClickHouse/pull/16504) ([TCeason](https://github.com/TCeason)). +* Fix DROP TABLE for Distributed (racy with INSERT). [#16409](https://github.com/ClickHouse/ClickHouse/pull/16409) ([Azat Khuzhin](https://github.com/azat)). +* Fix processing of very large entries in replication queue. Very large entries may appear in ALTER queries if table structure is extremely large (near 1 MB). This fixes [#16307](https://github.com/ClickHouse/ClickHouse/issues/16307). [#16332](https://github.com/ClickHouse/ClickHouse/pull/16332) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed the inconsistent behaviour when a part of return data could be dropped because the set for its filtration wasn't created. [#16308](https://github.com/ClickHouse/ClickHouse/pull/16308) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix bug with MySQL database. When MySQL server used as database engine is down some queries raise Exception, because they try to get tables from disabled server, while it's unnecessary. For example, query `SELECT ... FROM system.parts` should work only with MergeTree tables and don't touch MySQL database at all. [#16032](https://github.com/ClickHouse/ClickHouse/pull/16032) ([Kruglov Pavel](https://github.com/Avogar)). + + +### ClickHouse release v20.8.5.45-lts, 2020-10-29 + +#### Bug Fix + +* Fix double free in case of exception in function `dictGet`. It could have happened if dictionary was loaded with error. [#16429](https://github.com/ClickHouse/ClickHouse/pull/16429) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix group by with totals/rollup/cube modifers and min/max functions over group by keys. Fixes [#16393](https://github.com/ClickHouse/ClickHouse/issues/16393). [#16397](https://github.com/ClickHouse/ClickHouse/pull/16397) ([Anton Popov](https://github.com/CurtizJ)). +* Fix async Distributed INSERT w/ prefer_localhost_replica=0 and internal_replication. [#16358](https://github.com/ClickHouse/ClickHouse/pull/16358) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible memory leak during `GROUP BY` with string keys, caused by an error in `TwoLevelStringHashTable` implementation. [#16264](https://github.com/ClickHouse/ClickHouse/pull/16264) ([Amos Bird](https://github.com/amosbird)). +* Fix the case when memory can be overallocated regardless to the limit. This closes [#14560](https://github.com/ClickHouse/ClickHouse/issues/14560). [#16206](https://github.com/ClickHouse/ClickHouse/pull/16206) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix `ALTER MODIFY ... ORDER BY` query hang for `ReplicatedVersionedCollapsingMergeTree`. This fixes [#15980](https://github.com/ClickHouse/ClickHouse/issues/15980). [#16011](https://github.com/ClickHouse/ClickHouse/pull/16011) ([alesapin](https://github.com/alesapin)). +* Fix collate name & charset name parser and support `length = 0` for string type. [#16008](https://github.com/ClickHouse/ClickHouse/pull/16008) ([Winter Zhang](https://github.com/zhang2014)). +* Allow to use direct layout for dictionaries with complex keys. [#16007](https://github.com/ClickHouse/ClickHouse/pull/16007) ([Anton Popov](https://github.com/CurtizJ)). +* Prevent replica hang for 5-10 mins when replication error happens after a period of inactivity. [#15987](https://github.com/ClickHouse/ClickHouse/pull/15987) ([filimonov](https://github.com/filimonov)). +* Fix rare segfaults when inserting into or selecting from MaterializedView and concurrently dropping target table (for Atomic database engine). [#15984](https://github.com/ClickHouse/ClickHouse/pull/15984) ([tavplubix](https://github.com/tavplubix)). +* Fix ambiguity in parsing of settings profiles: `CREATE USER ... SETTINGS profile readonly` is now considered as using a profile named `readonly`, not a setting named `profile` with the readonly constraint. This fixes [#15628](https://github.com/ClickHouse/ClickHouse/issues/15628). [#15982](https://github.com/ClickHouse/ClickHouse/pull/15982) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a crash when database creation fails. [#15954](https://github.com/ClickHouse/ClickHouse/pull/15954) ([Winter Zhang](https://github.com/zhang2014)). +* Fixed `DROP TABLE IF EXISTS` failure with `Table ... doesn't exist` error when table is concurrently renamed (for Atomic database engine). Fixed rare deadlock when concurrently executing some DDL queries with multiple tables (like `DROP DATABASE` and `RENAME TABLE`) Fixed `DROP/DETACH DATABASE` failure with `Table ... doesn't exist` when concurrently executing `DROP/DETACH TABLE`. [#15934](https://github.com/ClickHouse/ClickHouse/pull/15934) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlocks in RBAC. [#15875](https://github.com/ClickHouse/ClickHouse/pull/15875) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix exception `Block structure mismatch` in `SELECT ... ORDER BY DESC` queries which were executed after `ALTER MODIFY COLUMN` query. Fixes [#15800](https://github.com/ClickHouse/ClickHouse/issues/15800). [#15852](https://github.com/ClickHouse/ClickHouse/pull/15852) ([alesapin](https://github.com/alesapin)). +* Fix some cases of queries, in which only virtual columns are selected. Previously `Not found column _nothing in block` exception may be thrown. Fixes [#12298](https://github.com/ClickHouse/ClickHouse/issues/12298). [#15756](https://github.com/ClickHouse/ClickHouse/pull/15756) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error `Cannot find column` which may happen at insertion into `MATERIALIZED VIEW` in case if query for `MV` containes `ARRAY JOIN`. [#15717](https://github.com/ClickHouse/ClickHouse/pull/15717) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed too low default value of `max_replicated_logs_to_keep` setting, which might cause replicas to become lost too often. Improve lost replica recovery process by choosing the most up-to-date replica to clone. Also do not remove old parts from lost replica, detach them instead. [#15701](https://github.com/ClickHouse/ClickHouse/pull/15701) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot add simple transform to empty Pipe` which happened while reading from `Buffer` table which has different structure than destination table. It was possible if destination table returned empty result for query. Fixes [#15529](https://github.com/ClickHouse/ClickHouse/issues/15529). [#15662](https://github.com/ClickHouse/ClickHouse/pull/15662) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed bug with globs in S3 table function, region from URL was not applied to S3 client configuration. [#15646](https://github.com/ClickHouse/ClickHouse/pull/15646) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Decrement the `ReadonlyReplica` metric when detaching read-only tables. This fixes [#15598](https://github.com/ClickHouse/ClickHouse/issues/15598). [#15592](https://github.com/ClickHouse/ClickHouse/pull/15592) ([sundyli](https://github.com/sundy-li)). +* Throw an error when a single parameter is passed to ReplicatedMergeTree instead of ignoring it. [#15516](https://github.com/ClickHouse/ClickHouse/pull/15516) ([nvartolomei](https://github.com/nvartolomei)). + +#### Improvement + +* Now it's allowed to execute `ALTER ... ON CLUSTER` queries regardless of the `` setting in cluster config. [#16075](https://github.com/ClickHouse/ClickHouse/pull/16075) ([alesapin](https://github.com/alesapin)). +* Unfold `{database}`, `{table}` and `{uuid}` macros in `ReplicatedMergeTree` arguments on table creation. [#16159](https://github.com/ClickHouse/ClickHouse/pull/16159) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.8.4.11-lts, 2020-10-09 + +#### Bug Fix + +* Fix the order of destruction for resources in `ReadFromStorage` step of query plan. It might cause crashes in rare cases. Possibly connected with [#15610](https://github.com/ClickHouse/ClickHouse/issues/15610). [#15645](https://github.com/ClickHouse/ClickHouse/pull/15645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `Element ... is not a constant expression` error when using `JSON*` function result in `VALUES`, `LIMIT` or right side of `IN` operator. [#15589](https://github.com/ClickHouse/ClickHouse/pull/15589) ([tavplubix](https://github.com/tavplubix)). +* Prevent the possibility of error message `Could not calculate available disk space (statvfs), errno: 4, strerror: Interrupted system call`. This fixes [#15541](https://github.com/ClickHouse/ClickHouse/issues/15541). [#15557](https://github.com/ClickHouse/ClickHouse/pull/15557) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Significantly reduce memory usage in AggregatingInOrderTransform/optimize_aggregation_in_order. [#15543](https://github.com/ClickHouse/ClickHouse/pull/15543) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ILIKE` operator stops being case insensitive if `LIKE` with the same pattern was executed. [#15536](https://github.com/ClickHouse/ClickHouse/pull/15536) ([alesapin](https://github.com/alesapin)). +* Fix `Missing columns` errors when selecting columns which absent in data, but depend on other columns which also absent in data. Fixes [#15530](https://github.com/ClickHouse/ClickHouse/issues/15530). [#15532](https://github.com/ClickHouse/ClickHouse/pull/15532) ([alesapin](https://github.com/alesapin)). +* Fix bug with event subscription in DDLWorker which rarely may lead to query hangs in `ON CLUSTER`. Introduced in [#13450](https://github.com/ClickHouse/ClickHouse/issues/13450). [#15477](https://github.com/ClickHouse/ClickHouse/pull/15477) ([alesapin](https://github.com/alesapin)). +* Report proper error when the second argument of `boundingRatio` aggregate function has a wrong type. [#15407](https://github.com/ClickHouse/ClickHouse/pull/15407) ([detailyang](https://github.com/detailyang)). +* Fix race condition during MergeTree table rename and background cleanup. [#15304](https://github.com/ClickHouse/ClickHouse/pull/15304) ([alesapin](https://github.com/alesapin)). +* Fix rare race condition on server startup when system.logs are enabled. [#15300](https://github.com/ClickHouse/ClickHouse/pull/15300) ([alesapin](https://github.com/alesapin)). +* Fix MSan report in QueryLog. Uninitialized memory can be used for the field `memory_usage`. [#15258](https://github.com/ClickHouse/ClickHouse/pull/15258) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix instance crash when using joinGet with LowCardinality types. This fixes https://github.com/ClickHouse/ClickHouse/issues/15214. [#15220](https://github.com/ClickHouse/ClickHouse/pull/15220) ([Amos Bird](https://github.com/amosbird)). +* Fix bug in table engine `Buffer` which doesn't allow to insert data of new structure into `Buffer` after `ALTER` query. Fixes [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117). [#15192](https://github.com/ClickHouse/ClickHouse/pull/15192) ([alesapin](https://github.com/alesapin)). +* Adjust decimals field size in mysql column definition packet. [#15152](https://github.com/ClickHouse/ClickHouse/pull/15152) ([maqroll](https://github.com/maqroll)). +* We already use padded comparison between String and FixedString (https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h#L333). This PR applies the same logic to field comparison which corrects the usage of FixedString as primary keys. This fixes https://github.com/ClickHouse/ClickHouse/issues/14908. [#15033](https://github.com/ClickHouse/ClickHouse/pull/15033) ([Amos Bird](https://github.com/amosbird)). +* If function `bar` was called with specifically crafter arguments, buffer overflow was possible. This closes [#13926](https://github.com/ClickHouse/ClickHouse/issues/13926). [#15028](https://github.com/ClickHouse/ClickHouse/pull/15028) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `Cannot rename ... errno: 22, strerror: Invalid argument` error on DDL query execution in Atomic database when running clickhouse-server in docker on Mac OS. [#15024](https://github.com/ClickHouse/ClickHouse/pull/15024) ([tavplubix](https://github.com/tavplubix)). +* Now settings `number_of_free_entries_in_pool_to_execute_mutation` and `number_of_free_entries_in_pool_to_lower_max_size_of_merge` can be equal to `background_pool_size`. [#14975](https://github.com/ClickHouse/ClickHouse/pull/14975) ([alesapin](https://github.com/alesapin)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Publish CPU frequencies per logical core in `system.asynchronous_metrics`. This fixes https://github.com/ClickHouse/ClickHouse/issues/14923. [#14924](https://github.com/ClickHouse/ClickHouse/pull/14924) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fixed `.metadata.tmp File exists` error when using `MaterializeMySQL` database engine. [#14898](https://github.com/ClickHouse/ClickHouse/pull/14898) ([Winter Zhang](https://github.com/zhang2014)). +* Fix a problem where the server may get stuck on startup while talking to ZooKeeper, if the configuration files have to be fetched from ZK (using the `from_zk` include option). This fixes [#14814](https://github.com/ClickHouse/ClickHouse/issues/14814). [#14843](https://github.com/ClickHouse/ClickHouse/pull/14843) ([Alexander Kuzmenkov](https://github.com/akuzm)). +* Fix wrong monotonicity detection for shrunk `Int -> Int` cast of signed types. It might lead to incorrect query result. This bug is unveiled in [#14513](https://github.com/ClickHouse/ClickHouse/issues/14513). [#14783](https://github.com/ClickHouse/ClickHouse/pull/14783) ([Amos Bird](https://github.com/amosbird)). +* Fixed the incorrect sorting order of `Nullable` column. This fixes [#14344](https://github.com/ClickHouse/ClickHouse/issues/14344). [#14495](https://github.com/ClickHouse/ClickHouse/pull/14495) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Improvement + +* Now it's possible to change the type of version column for `VersionedCollapsingMergeTree` with `ALTER` query. [#15442](https://github.com/ClickHouse/ClickHouse/pull/15442) ([alesapin](https://github.com/alesapin)). + + +### ClickHouse release v20.8.3.18-stable, 2020-09-18 + +#### Bug Fix + +* Fix the issue when some invocations of `extractAllGroups` function may trigger "Memory limit exceeded" error. This fixes [#13383](https://github.com/ClickHouse/ClickHouse/issues/13383). [#14889](https://github.com/ClickHouse/ClickHouse/pull/14889) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix SIGSEGV for an attempt to INSERT into StorageFile(fd). [#14887](https://github.com/ClickHouse/ClickHouse/pull/14887) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fixed missed default database name in metadata of materialized view when executing `ALTER ... MODIFY QUERY`. [#14664](https://github.com/ClickHouse/ClickHouse/pull/14664) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). +* Added the checker as neither calling `lc->isNullable()` nor calling `ls->getDictionaryPtr()->isNullable()` would return the correct result. [#14591](https://github.com/ClickHouse/ClickHouse/pull/14591) ([myrrc](https://github.com/myrrc)). +* Cleanup data directory after Zookeeper exceptions during CreateQuery for StorageReplicatedMergeTree Engine. [#14563](https://github.com/ClickHouse/ClickHouse/pull/14563) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix rare segfaults in functions with combinator -Resample, which could appear in result of overflow with very large parameters. [#14562](https://github.com/ClickHouse/ClickHouse/pull/14562) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement + +* Speed up server shutdown process if there are ongoing S3 requests. [#14858](https://github.com/ClickHouse/ClickHouse/pull/14858) ([Pavel Kovalenko](https://github.com/Jokser)). +* Allow using multi-volume storage configuration in storage Distributed. [#14839](https://github.com/ClickHouse/ClickHouse/pull/14839) ([Pavel Kovalenko](https://github.com/Jokser)). +* Speed up server shutdown process if there are ongoing S3 requests. [#14496](https://github.com/ClickHouse/ClickHouse/pull/14496) ([Pavel Kovalenko](https://github.com/Jokser)). +* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)). + + ### ClickHouse release v20.8.2.3-stable, 2020-09-08 #### Backward Incompatible Change @@ -1755,6 +1964,74 @@ No changes compared to v20.4.3.16-stable. ## ClickHouse release v20.3 + +### ClickHouse release v20.3.21.2-lts, 2020-11-02 + +#### Bug Fix + +* Fix dictGet in sharding_key (and similar places, i.e. when the function context is stored permanently). [#16205](https://github.com/ClickHouse/ClickHouse/pull/16205) ([Azat Khuzhin](https://github.com/azat)). +* Fix incorrect empty result for query from `Distributed` table if query has `WHERE`, `PREWHERE` and `GLOBAL IN`. Fixes [#15792](https://github.com/ClickHouse/ClickHouse/issues/15792). [#15933](https://github.com/ClickHouse/ClickHouse/pull/15933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix missing or excessive headers in `TSV/CSVWithNames` formats. This fixes [#12504](https://github.com/ClickHouse/ClickHouse/issues/12504). [#13343](https://github.com/ClickHouse/ClickHouse/pull/13343) ([Azat Khuzhin](https://github.com/azat)). + + +### ClickHouse release v20.3.20.6-lts, 2020-10-09 + +#### Bug Fix + +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15724](https://github.com/ClickHouse/ClickHouse/pull/15724), [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). +* Fix hang of queries with a lot of subqueries to same table of `MySQL` engine. Previously, if there were more than 16 subqueries to same `MySQL` table in query, it hang forever. [#15299](https://github.com/ClickHouse/ClickHouse/pull/15299) ([Anton Popov](https://github.com/CurtizJ)). +* Fix 'Unknown identifier' in GROUP BY when query has JOIN over Merge table. [#15242](https://github.com/ClickHouse/ClickHouse/pull/15242) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix to make predicate push down work when subquery contains finalizeAggregation function. Fixes [#14847](https://github.com/ClickHouse/ClickHouse/issues/14847). [#14937](https://github.com/ClickHouse/ClickHouse/pull/14937) ([filimonov](https://github.com/filimonov)). +* Concurrent `ALTER ... REPLACE/MOVE PARTITION ...` queries might cause deadlock. It's fixed. [#13626](https://github.com/ClickHouse/ClickHouse/pull/13626) ([tavplubix](https://github.com/tavplubix)). + + +### ClickHouse release v20.3.19.4-lts, 2020-09-18 + +#### Bug Fix + +* Fix rare error in `SELECT` queries when the queried column has `DEFAULT` expression which depends on the other column which also has `DEFAULT` and not present in select query and not exists on disk. Partially fixes [#14531](https://github.com/ClickHouse/ClickHouse/issues/14531). [#14845](https://github.com/ClickHouse/ClickHouse/pull/14845) ([alesapin](https://github.com/alesapin)). +* Fix bug when `ALTER UPDATE` mutation with Nullable column in assignment expression and constant value (like `UPDATE x = 42`) leads to incorrect value in column or segfault. Fixes [#13634](https://github.com/ClickHouse/ClickHouse/issues/13634), [#14045](https://github.com/ClickHouse/ClickHouse/issues/14045). [#14646](https://github.com/ClickHouse/ClickHouse/pull/14646) ([alesapin](https://github.com/alesapin)). +* Fix wrong Decimal multiplication result caused wrong decimal scale of result column. [#14603](https://github.com/ClickHouse/ClickHouse/pull/14603) ([Artem Zuikov](https://github.com/4ertus2)). + +#### Improvement + +* Support custom codecs in compact parts. [#12183](https://github.com/ClickHouse/ClickHouse/pull/12183) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.3.18.10-lts, 2020-09-08 + +#### Bug Fix + +* Stop query execution if exception happened in `PipelineExecutor` itself. This could prevent rare possible query hung. Continuation of [#14334](https://github.com/ClickHouse/ClickHouse/issues/14334). [#14402](https://github.com/ClickHouse/ClickHouse/pull/14402) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the behaviour when sometimes cache-dictionary returned default value instead of present value from source. [#13624](https://github.com/ClickHouse/ClickHouse/pull/13624) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix CAST(Nullable(String), Enum()). [#12745](https://github.com/ClickHouse/ClickHouse/pull/12745) ([Azat Khuzhin](https://github.com/azat)). +* Fixed data race in `text_log`. It does not correspond to any real bug. [#9726](https://github.com/ClickHouse/ClickHouse/pull/9726) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Improvement + +* Fix wrong error for long queries. It was possible to get syntax error other than `Max query size exceeded` for correct query. [#13928](https://github.com/ClickHouse/ClickHouse/pull/13928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Return NULL/zero when value is not parsed completely in parseDateTimeBestEffortOrNull/Zero functions. This fixes [#7876](https://github.com/ClickHouse/ClickHouse/issues/7876). [#11653](https://github.com/ClickHouse/ClickHouse/pull/11653) ([alexey-milovidov](https://github.com/alexey-milovidov)). + +#### Performance Improvement + +* Slightly optimize very short queries with LowCardinality. [#14129](https://github.com/ClickHouse/ClickHouse/pull/14129) ([Anton Popov](https://github.com/CurtizJ)). + +#### Build/Testing/Packaging Improvement + +* Fix UBSan report (adding zero to nullptr) in HashTable that appeared after migration to clang-10. [#10638](https://github.com/ClickHouse/ClickHouse/pull/10638) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + +### ClickHouse release v20.3.17.173-lts, 2020-08-15 + +#### Bug Fix + +* Fix crash in JOIN with StorageMerge and `set enable_optimize_predicate_expression=1`. [#13679](https://github.com/ClickHouse/ClickHouse/pull/13679) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix invalid return type for comparison of tuples with `NULL` elements. Fixes [#12461](https://github.com/ClickHouse/ClickHouse/issues/12461). [#13420](https://github.com/ClickHouse/ClickHouse/pull/13420) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix queries with constant columns and `ORDER BY` prefix of primary key. [#13396](https://github.com/ClickHouse/ClickHouse/pull/13396) ([Anton Popov](https://github.com/CurtizJ)). +* Return passed number for numbers with MSB set in roundUpToPowerOfTwoOrZero(). [#13234](https://github.com/ClickHouse/ClickHouse/pull/13234) ([Azat Khuzhin](https://github.com/azat)). + + ### ClickHouse release v20.3.16.165-lts 2020-08-10 #### Bug Fix diff --git a/base/common/StringRef.h b/base/common/StringRef.h index b51b95456cb..ac9d7c47b72 100644 --- a/base/common/StringRef.h +++ b/base/common/StringRef.h @@ -1,6 +1,7 @@ #pragma once #include +#include // for std::logic_error #include #include #include diff --git a/base/common/sort.h b/base/common/sort.h new file mode 100644 index 00000000000..2128014ca5e --- /dev/null +++ b/base/common/sort.h @@ -0,0 +1,37 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +# include // Y_IGNORE +#else +# include +#endif + +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_select(first, nth, last); +#else + ::std::nth_element(first, nth, last); +#endif +} + +template +void partial_sort(RandomIt first, RandomIt middle, RandomIt last) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_partial_sort(first, middle, last); +#else + ::std::partial_sort(first, middle, last); +#endif +} + +template +void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare) +{ +#if !defined(ARCADIA_BUILD) + ::miniselect::floyd_rivest_partial_sort(first, middle, last, compare); +#else + ::std::partial_sort(first, middle, last, compare); +#endif +} diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index d90bde30a43..2a889819c11 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -5,6 +5,9 @@ /// (See at http://www.boost.org/LICENSE_1_0.txt) #include "throwError.h" +#include +#include +#include namespace wide { @@ -192,7 +195,7 @@ struct integer::_impl } template - constexpr static auto to_Integral(T f) noexcept + __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { if constexpr (std::is_same_v) return f; @@ -225,25 +228,54 @@ struct integer::_impl self.items[i] = 0; } - constexpr static void wide_integer_from_bultin(integer & self, double rhs) noexcept - { - if ((rhs > 0 && rhs < std::numeric_limits::max()) || (rhs < 0 && rhs > std::numeric_limits::min())) + /** + * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310 + * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them. + * + * t = a1 * max_int + b1, a1 > max_int, b1 < max_int + * a1 = a2 * max_int + b2, a2 > max_int, b2 < max_int + * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case. + */ + template + constexpr static void set_multiplier(integer & self, T t) noexcept { + constexpr uint64_t max_int = std::numeric_limits::max(); + const T alpha = t / max_int; + + if (alpha <= max_int) + self = static_cast(alpha); + else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations. + set_multiplier(self, alpha); + + self *= max_int; + self += static_cast(t - alpha * max_int); // += b_i + } + + constexpr static void wide_integer_from_bultin(integer& self, double rhs) noexcept { + constexpr int64_t max_int = std::numeric_limits::max(); + constexpr int64_t min_int = std::numeric_limits::min(); + + /// There are values in int64 that have more than 53 significant bits (in terms of double + /// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up, + /// the result may not fit in 64 bits. + /// The example of such a number is 9.22337e+18. + /// As to_Integral does a static_cast to int64_t, it may result in UB. + /// The necessary check here is that long double has enough significant (mantissa) bits to store the + /// int64_t max value precisely. + static_assert(LDBL_MANT_DIG >= 64, + "On your system long double has less than 64 precision bits," + "which may result in UB when initializing double from int64_t"); + + if ((rhs > 0 && rhs < max_int) || (rhs < 0 && rhs > min_int)) { - self = to_Integral(rhs); + self = static_cast(rhs); return; } - long double r = rhs; - if (r < 0) - r = -r; + const long double rhs_long_double = (static_cast(rhs) < 0) + ? -static_cast(rhs) + : rhs; - size_t count = r / std::numeric_limits::max(); - self = count; - self *= std::numeric_limits::max(); - long double to_diff = count; - to_diff *= std::numeric_limits::max(); - - self += to_Integral(r - to_diff); + set_multiplier(self, rhs_long_double); if (rhs < 0) self = -self; diff --git a/base/common/ya.make b/base/common/ya.make index 02e0e90fe58..adbbe17b486 100644 --- a/base/common/ya.make +++ b/base/common/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/base/common/ya.make.in b/base/common/ya.make.in index 89c075da309..bcac67c7923 100644 --- a/base/common/ya.make.in +++ b/base/common/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/base/daemon/ya.make b/base/daemon/ya.make index 75ea54b6021..f3b4059f002 100644 --- a/base/daemon/ya.make +++ b/base/daemon/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() NO_COMPILER_WARNINGS() diff --git a/base/loggers/ya.make b/base/loggers/ya.make index 6cb95633c72..943b6f12b73 100644 --- a/base/loggers/ya.make +++ b/base/loggers/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/base/pcg-random/ya.make b/base/pcg-random/ya.make index c6a50887178..705cdc05341 100644 --- a/base/pcg-random/ya.make +++ b/base/pcg-random/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL (GLOBAL clickhouse/base/pcg-random) diff --git a/base/readpassphrase/ya.make b/base/readpassphrase/ya.make index 46f7f5983e3..d1ace8925ae 100644 --- a/base/readpassphrase/ya.make +++ b/base/readpassphrase/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() CFLAGS(-g0) diff --git a/base/widechar_width/ya.make b/base/widechar_width/ya.make index 180aea001c1..0d61e0dbf70 100644 --- a/base/widechar_width/ya.make +++ b/base/widechar_width/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL(GLOBAL clickhouse/base/widechar_width) diff --git a/base/ya.make b/base/ya.make index bbd961d02c3..9f4cf0fd4a7 100644 --- a/base/ya.make +++ b/base/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + RECURSE( common daemon diff --git a/contrib/libunwind b/contrib/libunwind index 198458b35f1..7d78d361891 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 198458b35f100da32bd3e74c2a3ce8d236db299b +Subproject commit 7d78d3618910752c256b2b58c3895f4efea47fac diff --git a/contrib/rocksdb b/contrib/rocksdb index 963314ffd68..35d8e36ef1b 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a +Subproject commit 35d8e36ef1b8e3e0759ca81215f855226a0a54bd diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 07c6861b406..9e850a6e781 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -347,8 +347,9 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc - ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc @@ -394,6 +395,8 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc ${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc ${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc + ${ROCKSDB_SOURCE_DIR}/db/output_validator.cc + ${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc ${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc ${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc ${ROCKSDB_SOURCE_DIR}/db/repair.cc @@ -451,12 +454,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc ${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc ${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc - ${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc ${ROCKSDB_SOURCE_DIR}/options/cf_options.cc + ${ROCKSDB_SOURCE_DIR}/options/configurable.cc ${ROCKSDB_SOURCE_DIR}/options/db_options.cc ${ROCKSDB_SOURCE_DIR}/options/options.cc ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc @@ -507,6 +510,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc ${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc ${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc + ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc @@ -515,6 +519,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc ${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc ${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile index 32020951539..cea1a63cf6f 100644 --- a/docker/test/coverage/Dockerfile +++ b/docker/test/coverage/Dockerfile @@ -7,8 +7,10 @@ ENV SOURCE_DIR=/build ENV OUTPUT_DIR=/output ENV IGNORE='.*contrib.*' -CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-10 CXX=clang++-10 cmake .. && cd /; \ +RUN apt-get update && apt-get install cmake --yes --no-install-recommends + +CMD mkdir -p /build/obj-x86_64-linux-gnu && cd /build/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd /; \ dpkg -i /package_folder/clickhouse-common-static_*.deb; \ - llvm-profdata-10 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ - llvm-cov-10 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ + llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata && \ + llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov && \ genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index ab1da5aeb33..c95344eeca2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -15,6 +15,9 @@ stage=${stage:-} # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" +# Run only matching tests. +FASTTEST_FOCUS=${FASTTEST_FOCUS:-""} + FASTTEST_WORKSPACE=$(readlink -f "${FASTTEST_WORKSPACE:-.}") FASTTEST_SOURCE=$(readlink -f "${FASTTEST_SOURCE:-$FASTTEST_WORKSPACE/ch}") FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/build}}") @@ -287,9 +290,11 @@ TESTS_TO_SKIP=( 01322_ttest_scipy 01545_system_errors + # Checks system.errors + 01563_distributed_query_finish ) -time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" +time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" # substr is to remove semicolon after test name readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 76cadc3ce11..004bac02918 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -30,7 +30,7 @@ RUN apt-get update \ tzdata \ vim \ wget \ - && pip3 --no-cache-dir install clickhouse_driver scipy \ + && pip3 --no-cache-dir install 'clickhouse-driver>=0.1.5' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index ce7a6ae094a..f3609bcfcdb 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -16,7 +16,7 @@ 300 - 20 + 12 diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 337f13690b6..2568f7ac066 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -14,10 +14,12 @@ import string import sys import time import traceback +import logging import xml.etree.ElementTree as et from threading import Thread from scipy import stats +logging.basicConfig(format='%(asctime)s: %(levelname)s: %(module)s: %(message)s', level='WARNING') total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds @@ -171,12 +173,9 @@ reportStageEnd('drop-1') settings = root.findall('settings/*') for conn_index, c in enumerate(all_connections): for s in settings: - try: - q = f"set {s.tag} = '{s.text}'" - c.execute(q) - print(f'set\t{conn_index}\t{c.last_query.elapsed}\t{tsv_escape(q)}') - except: - print(traceback.format_exc(), file=sys.stderr) + # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings + # (https://github.com/mymarilyn/clickhouse-driver/pull/142) + c.settings[s.tag] = s.text reportStageEnd('settings') diff --git a/docker/test/stateful_with_coverage/Dockerfile b/docker/test/stateful_with_coverage/Dockerfile index f5d66ed5013..ac6645b9463 100644 --- a/docker/test/stateful_with_coverage/Dockerfile +++ b/docker/test/stateful_with_coverage/Dockerfile @@ -1,12 +1,12 @@ # docker build -t yandex/clickhouse-stateful-test-with-coverage . -FROM yandex/clickhouse-stateless-test +FROM yandex/clickhouse-stateless-test-with-coverage RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-9 main" >> /etc/apt/sources.list RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests + python3-requests procps psmisc COPY s3downloader /s3downloader COPY run.sh /run.sh diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index aaf7e0a44ac..5fc6350fad8 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -1,40 +1,44 @@ #!/bin/bash kill_clickhouse () { - kill "$(pgrep -u clickhouse)" 2>/dev/null + echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" - break - else - echo "Process $(pgrep -u clickhouse) still alive" - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done + + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & -} - -wait_llvm_profdata () { - while kill -0 "$(pgrep llvm-profdata-10)" + counter=0 + until clickhouse-client --query "SELECT 1" do - echo "Waiting for profdata $(pgrep llvm-profdata-10) still alive" - sleep 3 + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) done } -merge_client_files_in_background () { - client_files=$(ls /client_*profraw 2>/dev/null) - if [ -n "$client_files" ] - then - llvm-profdata-10 merge -sparse "$client_files" -o "merged_client_$(date +%s).profraw" - rm "$client_files" - fi -} chmod 777 / @@ -51,26 +55,7 @@ chmod 777 -R /var/log/clickhouse-server/ # install test configs /usr/share/clickhouse-test/config/install.sh -function start() -{ - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt 120 ] - then - echo "Cannot start clickhouse-server" - cat /var/log/clickhouse-server/stdout.log - tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n1000 /var/log/clickhouse-server/clickhouse-server.log - break - fi - timeout 120 service clickhouse-server start - sleep 0.5 - counter=$((counter + 1)) - done -} - -start +start_clickhouse # shellcheck disable=SC2086 # No quotes because I want to split it into words. if ! /s3downloader --dataset-names $DATASETS; then @@ -81,25 +66,20 @@ fi chmod 777 -R /var/lib/clickhouse -while /bin/true; do - merge_client_files_in_background - sleep 2 -done & -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW DATABASES" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "CREATE DATABASE test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW DATABASES" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "CREATE DATABASE test" kill_clickhouse start_clickhouse -sleep 10 +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test" if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" @@ -109,15 +89,10 @@ fi # more idiologically correct. read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" -LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt +LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt kill_clickhouse -wait_llvm_profdata - sleep 3 -wait_llvm_profdata # 100% merged all parts - - cp /*.profraw /profraw ||: diff --git a/docker/test/stateful_with_coverage/s3downloader b/docker/test/stateful_with_coverage/s3downloader index a27c03a70f0..363ece8dac6 100755 --- a/docker/test/stateful_with_coverage/s3downloader +++ b/docker/test/stateful_with_coverage/s3downloader @@ -29,7 +29,7 @@ def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'w') as f: + with open(path, 'wb') as f: response = requests.get(url, stream=True) response.raise_for_status() total_length = response.headers.get('content-length') diff --git a/docker/test/stateless_with_coverage/Dockerfile b/docker/test/stateless_with_coverage/Dockerfile index 1d6a85adf9e..f7379ba5568 100644 --- a/docker/test/stateless_with_coverage/Dockerfile +++ b/docker/test/stateless_with_coverage/Dockerfile @@ -1,4 +1,4 @@ -# docker build -t yandex/clickhouse-stateless-with-coverage-test . +# docker build -t yandex/clickhouse-stateless-test-with-coverage . # TODO: that can be based on yandex/clickhouse-stateless-test (llvm version and CMD differs) FROM yandex/clickhouse-test-base @@ -28,7 +28,9 @@ RUN apt-get update -y \ lsof \ unixodbc \ wget \ - qemu-user-static + qemu-user-static \ + procps \ + psmisc RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 758591df618..4e4d9430a11 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -2,27 +2,41 @@ kill_clickhouse () { echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' - kill "$(pgrep -u clickhouse)" 2>/dev/null + pkill -f "clickhouse-server" 2>/dev/null - for _ in {1..10} + + for _ in {1..120} do - if ! kill -0 "$(pgrep -u clickhouse)"; then - echo "No clickhouse process" | ts '%Y-%m-%d %H:%M:%S' - break - else - echo "Process $(pgrep -u clickhouse) still alive" | ts '%Y-%m-%d %H:%M:%S' - sleep 10 - fi + if ! pkill -0 -f "clickhouse-server" ; then break ; fi + echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' + sleep 1 done - echo "Will try to send second kill signal for sure" - kill "$(pgrep -u clickhouse)" 2>/dev/null - sleep 5 - echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' + if pkill -0 -f "clickhouse-server" + then + pstree -apgT + jobs + echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' + return 1 + fi } start_clickhouse () { LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & + counter=0 + until clickhouse-client --query "SELECT 1" + do + if [ "$counter" -gt 120 ] + then + echo "Cannot start clickhouse-server" + cat /var/log/clickhouse-server/stdout.log + tail -n1000 /var/log/clickhouse-server/stderr.log + tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + break + fi + sleep 0.5 + counter=$((counter + 1)) + done } chmod 777 / @@ -44,9 +58,6 @@ chmod 777 -R /var/log/clickhouse-server/ start_clickhouse -sleep 10 - - if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then SKIP_LIST_OPT="--use-skip-list" fi diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index e99af08afa5..70db2dc485e 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.62 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.65 docker-compose docker dicttoxml kazoo tzlocal ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/docs/en/faq/integration/json-import.md b/docs/en/faq/integration/json-import.md index fb94f226f2b..7038cc539d2 100644 --- a/docs/en/faq/integration/json-import.md +++ b/docs/en/faq/integration/json-import.md @@ -30,4 +30,4 @@ Instead of inserting data manually, you might consider to use one of [client lib - `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. !!! note "Note" - Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. \ No newline at end of file + Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 4e8347c9a6e..c737fad152f 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -26,6 +26,9 @@ toc_title: Client Libraries - [go-clickhouse](https://github.com/roistat/go-clickhouse) - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) +- Swift + - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) + - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) - NodeJs - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - [node-clickhouse](https://github.com/apla/node-clickhouse) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a37ae685368..e111cf3ab75 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1081,4 +1081,45 @@ Default value: `/var/lib/clickhouse/access/`. - [Access Control and Account Management](../../operations/access-rights.md#access-control) +## user_directories {#user_directories} + +Section of the configuration file that contains settings: +- Path to configuration file with predefined users. +- Path to folder where users created by SQL commands are stored. + +If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used. + +The `user_directories` section can contain any number of items, the order of the items means their precedence (the higher the item the higher the precedence). + +**Example** + +``` xml + + + /etc/clickhouse-server/users.xml + + + /var/lib/clickhouse/access/ + + +``` + +You can also specify settings `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. + +To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: +- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. +- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authenthication attept will fail as if the provided password was incorrect. + +**Example** + +``` xml + + my_ldap_server + + + + + +``` + [Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f9c3c8a5d75..ba899754b18 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -307,7 +307,51 @@ Disabled by default. ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -For TSV input format switches to parsing enum values as enum ids. +Enables or disables parsing enum values as enum ids for TSV input format. + +Possible values: + +- 0 — Enum values are parsed as values. +- 1 — Enum values are parsed as enum IDs + +Default value: 0. + +**Example** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_tsv_enum_as_number` setting is enabled: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +``` + +throws an exception. ## input_format_null_as_default {#settings-input-format-null-as-default} @@ -1182,7 +1226,47 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal ( ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -For CSV input format switches to parsing enum values as enum ids. +Enables or disables parsing enum values as enum ids for CSV input format. + +Possible values: + +- 0 — Enum values are parsed as values. +- 1 — Enum values are parsed as enum IDs. + +Default value: 0. + +**Examples** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_csv_enum_as_number` setting is enabled: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Result: + +```text +┌──Id─┬─Value─────┐ +│ 102 │ second │ +└─────┴───────────┘ +``` + +When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +``` + +throws an exception. ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} @@ -2233,4 +2317,10 @@ Possible values: Default value: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Allows configurable `NULL` representation for [TSV](../../interfaces/formats.md#tabseparated) output format. The setting only controls output format and `\N` is the only supported `NULL` representation for TSV input format. + +Default value: `\N`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..bc7e6335c0d --- /dev/null +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Contains information about currently running background fetches. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. + +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. + +**Example** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**See Also** + +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 270b7d8db39..543a5d3fed8 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`. - Now you can use the `groupArray` function to create an array from the `y` column: ``` sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..ea44d5f1ddd --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,37 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Initializes aggregation for your input rows. It is intended for the functions with the suffix `State`. +Use it for tests or to process columns of types `AggregateFunction` and `AggregationgMergeTree`. + +**Syntax** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Parameters** + +- `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). + +**Returned value(s)** + +Returns the result of the aggregation for your input rows. The return type will be the same as the return type of function, that `initializeAgregation` takes as first argument. +For example for functions with the suffix `State` the return type will be `AggregateFunction`. + +**Example** + +Query: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Result: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index a5a347e553a..63b356e27e6 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -337,26 +337,124 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` -## date_trunc(datepart, time_or_data\[, time_zone\]), dateTrunc(datepart, time_or_data\[, time_zone\]) {#date_trunc} +## date_trunc {#date_trunc} -Truncates a date or date with time based on the specified datepart, such as -- `second` -- `minute` -- `hour` -- `day` -- `week` -- `month` -- `quarter` -- `year` +Truncates date and time data to the specified part of date. -```sql -SELECT date_trunc('hour', now()) +**Syntax** + +``` sql +date_trunc(unit, value[, timezone]) ``` -## now {#now} +Alias: `dateTrunc`. -Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided. -This function returns a constant, even if the request took a long time to complete. +**Parameters** + +- `unit` — Part of date. [String](../syntax.md#syntax-string-literal). + Possible values: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Value, truncated to the specified part of date. + +Type: [Datetime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query without timezone: + +``` sql +SELECT now(), date_trunc('hour', now()); +``` + +Result: + +``` text +┌───────────────now()─┬─date_trunc('hour', now())─┐ +│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │ +└─────────────────────┴───────────────────────────┘ +``` + +Query with the specified timezone: + +```sql +SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); +``` + +Result: + +```text +┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐ +│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │ +└─────────────────────┴────────────────────────────────────────────┘ +``` + +**See also** + +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) + +# now {#now} + +Returns the current date and time. + +**Syntax** + +``` sql +now([timezone]) +``` + +**Parameters** + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Current date and time. + +Type: [Datetime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query without timezone: + +``` sql +SELECT now(); +``` + +Result: + +``` text +┌───────────────now()─┐ +│ 2020-10-17 07:42:09 │ +└─────────────────────┘ +``` + +Query with the specified timezone: + +``` sql +SELECT now('Europe/Moscow'); +``` + +Result: + +``` text +┌─now('Europe/Moscow')─┐ +│ 2020-10-17 10:42:23 │ +└──────────────────────┘ +``` ## today {#today} @@ -437,18 +535,7 @@ dateDiff('unit', startdate, enddate, [timezone]) - `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). - Supported values: - - | unit | - | ---- | - |second | - |minute | - |hour | - |day | - |week | - |month | - |quarter | - |year | + Supported values: second, minute, hour, day, week, month, quarter, year. - `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md new file mode 100644 index 00000000000..bef2f8137d0 --- /dev/null +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -0,0 +1,381 @@ +--- +toc_priority: 67 +toc_title: Encryption +--- + +# Encryption functions {#encryption-functions} + +These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm. + +Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128-`, `-196-`, and `-256-` modes respectively. + +Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). + +Note that these functions work slowly. + +## encrypt {#encrypt} + +This function encrypts data using these modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Syntax** + +``` sql +encrypt('mode', 'plaintext', 'key' [, iv, aad]) +``` + +**Parameters** + +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Ciphered String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Example without `iv`: + +Query: + +``` sql +SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ +│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ +│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ +│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Example with `iv`: + +Query: + +``` sql +SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ +│ aes-256-ctr │ │ +│ aes-256-ctr │ 7FB039F7 │ +│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ +└─────────────┴───────────────────────────────────────────────┘ +``` + +Example with `-gcm`: + +Query: + +``` sql +SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ +│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ +│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ +│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +Example with `-gcm` mode and with `aad`: + +Query: + +``` sql +SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ +│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ +│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ +│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +## aes_encrypt_mysql {#aes_encrypt_mysql} + +Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. + +Supported encryption modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Syntax** + +```sql +aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) +``` + +**Parameters** + +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Ciphered String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Example without `iv`: + +Query: + +``` sql +SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ +│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ +│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ +│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Example with `iv`: + +Query: + +``` sql +SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +``` + +Result: + +``` text +┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ +│ aes-256-cfb128 │ │ +│ aes-256-cfb128 │ 7FB039F7 │ +│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ +└────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## decrypt {#decrypt} + +This function decrypts data using these modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Syntax** + +```sql +decrypt('mode', 'ciphertext', 'key' [, iv, aad]) +``` + +**Parameters** + +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Query: + +``` sql + +SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +``` + +Result: + +```text +┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ +│ aes-128-ecb │ │ +│ aes-128-ecb │ text │ +│ aes-128-ecb │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## aes_decrypt_mysql {#aes_decrypt_mysql} + +Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. + +Supported decryption modes: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Syntax** + +```sql +aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) +``` + +**Parameters** + +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). + +**Examples** + +Create this table: + +Query: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Insert this data: + +Query: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Query: + +``` sql +SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +``` + +Result: + +``` text +┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ +│ aes-128-cbc │ │ +│ aes-128-cbc │ text │ +│ aes-128-cbc │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 3e63fc9946b..bfa1998d68a 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -306,3 +306,67 @@ execute_native_thread_routine start_thread clone ``` +## tid {#tid} + +Returns id of the thread, in which current [Block](https://clickhouse.tech/docs/en/development/architecture/#block) is processed. + +**Syntax** + +``` sql +tid() +``` + +**Returned value** + +- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Example** + +Query: + +``` sql +SELECT tid(); +``` + +Result: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + +Emits trace log message to server log for each [Block](https://clickhouse.tech/docs/en/development/architecture/#block). + +**Syntax** + +``` sql +logTrace('message') +``` + +**Parameters** + +- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- Always returns 0. + +**Example** + +Query: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Result: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index fc066a34b0b..f8458b27a22 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -325,7 +325,59 @@ This function accepts a number or date or date with time, and returns a FixedStr ## reinterpretAsUUID {#reinterpretasuuid} -This function accepts FixedString, and returns UUID. Takes 16 bytes string. If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. + +**Syntax** + +``` sql +reinterpretAsUUID(fixed_string) +``` + +**Parameters** + +- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). + +**Returned value** + +- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). + +**Examples** + +String to UUID. + +Query: + +``` sql +SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) +``` + +Result: + +``` text +┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ +│ 08090a0b-0c0d-0e0f-0001-020304050607 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Going back and forth from String to UUID. + +Query: + +``` sql +WITH + generateUUIDv4() AS uuid, + identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, + reinterpretAsUUID(reverse(unhex(str))) AS uuid2 +SELECT uuid = uuid2; +``` + +Result: + +``` text +┌─equals(uuid, uuid2)─┐ +│ 1 │ +└─────────────────────┘ +``` ## CAST(x, T) {#type_conversion_function-cast} diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index ad63a5b72ac..0da74ce1b0e 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -115,7 +115,21 @@ Returns the “first significant subdomain”. This is a non-standard concept sp Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain” (see the explanation above). -For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. + +### cutToFirstSignificantSubdomainWithWWW {#cuttofirstsignificantsubdomainwithwww} + +Returns the part of the domain that includes top-level subdomains up to the “first significant subdomain”, without stripping "www". + +For example: + +- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### port(URL\[, default_port = 0\]) {#port} diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index b1098c54703..3fe94e267e4 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -20,7 +20,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values. diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index dbfd5431861..71586e15a31 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ It is applicable when selecting data from tables that use the [MergeTree](../../ ### Drawbacks {#drawbacks} -Queries that use `FINAL` are executed not as fast as similar queries that don’t, because: +Queries that use `FINAL` are executed slightly slower than similar queries that don’t, because: -- Query is executed in a single thread and data is merged during query execution. +- Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 6cb99f285f2..500a09dcbef 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -6,7 +6,7 @@ toc_title: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: -- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”. +- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. - All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. - Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. @@ -45,6 +45,154 @@ You can see that `GROUP BY` for `y = NULL` summed up `x`, as if `NULL` is this v If you pass several keys to `GROUP BY`, the result will give you all the combinations of the selection, as if `NULL` were a specific value. +## WITH ROLLUP Modifier {#with-rollup-modifier} + +`WITH ROLLUP` modifier is used to calculate subtotals for the key expressions, based on their order in the `GROUP BY` list. The subtotals rows are added after the result table. + +The subtotals are calculated in the reverse order: at first subtotals are calculated for the last key expression in the list, then for the previous one, and so on up to the first key expression. + +In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` +As `GROUP BY` section has three key expressions, the result contains four tables with subtotals "rolled up" from right to left: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (and `day` column is filled with zeros); +- `GROUP BY year` (now `month, day` columns are both filled with zeros); +- and totals (and all three key expression columns are zeros). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## WITH CUBE Modifier {#with-cube-modifier} + +`WITH CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table. + +In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line. + +!!! note "Note" + Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results. + +**Example** + +Consider the table t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +As `GROUP BY` section has three key expressions, the result contains eight tables with subtotals for all key expression combinations: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- and totals. + +Columns, excluded from `GROUP BY`, are filled with zeros. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## WITH TOTALS Modifier {#with-totals-modifier} If the `WITH TOTALS` modifier is specified, another row will be calculated. This row will have key columns containing default values (zeros or empty lines), and columns of aggregate functions with the values calculated across all the rows (the “total” values). @@ -88,8 +236,6 @@ SELECT FROM hits ``` -However, in contrast to standard SQL, if the table doesn’t have any rows (either there aren’t any at all, or there aren’t any after using WHERE to filter), an empty result is returned, and not the result from one of the rows containing the initial values of aggregate functions. - As opposed to MySQL (and conforming to standard SQL), you can’t get some value of some column that is not in a key or aggregate function (except constant expressions). To work around this, you can use the ‘any’ aggregate function (get the first encountered value) or ‘min/max’. Example: @@ -105,10 +251,6 @@ GROUP BY domain For every different key value encountered, `GROUP BY` calculates a set of aggregate function values. -`GROUP BY` is not supported for array columns. - -A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`. - ## Implementation Details {#implementation-details} Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 3107f791eb9..901b850fc46 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -20,7 +20,7 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 75303fde19e..509b7553536 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -204,7 +204,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ## Managing ReplicatedMergeTree Tables {#query-language-system-replicated} -ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables. +ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication/#table_engines-replication) tables. ### STOP FETCHES {#query_language-system-stop-fetches} diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 70994f3d882..296f5c7c5f3 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -57,7 +57,7 @@ Identifiers are: Identifiers can be quoted or non-quoted. The latter is preferred. -Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` +Non-quoted identifiers must match the regex `^[0-9a-zA-Z_]*[a-zA-Z_]$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``. diff --git a/docs/es/sql-reference/statements/create.md b/docs/es/sql-reference/statements/create.md index b851435286e..db3194ae114 100644 --- a/docs/es/sql-reference/statements/create.md +++ b/docs/es/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crear [diccionario externo](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) con dado [estructura](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [fuente](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [diseño](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) y [vida](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fa/sql-reference/statements/create.md b/docs/fa/sql-reference/statements/create.md index c4b7ede05dd..970e8ee7535 100644 --- a/docs/fa/sql-reference/statements/create.md +++ b/docs/fa/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` ایجاد [فرهنگ لغت خارجی](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) با توجه به [ساختار](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [متن](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [طرحبندی](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) و [طول عمر](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/fr/sql-reference/statements/create.md b/docs/fr/sql-reference/statements/create.md index f7b3790baf2..e7c8040ee6e 100644 --- a/docs/fr/sql-reference/statements/create.md +++ b/docs/fr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Crée [externe dictionnaire](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) avec le [structure](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [disposition](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) et [vie](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ja/sql-reference/statements/create.md b/docs/ja/sql-reference/statements/create.md index ae518dbfac8..1d1f2c57556 100644 --- a/docs/ja/sql-reference/statements/create.md +++ b/docs/ja/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` 作成 [外部辞書](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 与えられたと [構造](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [ソース](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [レイアウト](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) と [生涯](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 2745718381b..9941e4f3ac5 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1068,4 +1068,45 @@ ClickHouse использует ZooKeeper для хранения метадан - [Управление доступом](../access-rights.md#access-control) +## user_directories {#user_directories} + +Секция конфигурационного файла,которая содержит настройки: +- Путь к конфигурационному файлу с предустановленными пользователями. +- Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд. + +Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется. + +Секция `user_directories` может содержать любое количество элементов, порядок расположения элементов обозначает их приоритет (чем выше элемент, тем выше приоритет). + +**Пример** + +``` xml + + + /etc/clickhouse-server/users.xml + + + /var/lib/clickhouse/access/ + + +``` + +Также вы можете указать настройку `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). + +Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами: +- `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым. +- `roles` — раздел со списком локально определенных ролей, которые будут назначены каждому пользователю, полученному с LDAP-сервера. Если роли не заданы, пользователь не сможет выполнять никаких действий после аутентификации. Если какая-либо из перечисленных ролей не определена локально во время проверки подлинности, попытка проверки подлинности завершится неудачей, как если бы предоставленный пароль был неверным. + +**Пример** + +``` xml + + my_ldap_server + + + + + +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_configuration_parameters/settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 39a996cb44e..af0fc3e6137 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -289,6 +289,54 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Disabled by default. +## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} + +Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. + +Возможные значения: + +- 0 — парсинг значений перечисления как значений. +- 1 — парсинг значений перечисления как идентификаторов перечисления. + +Значение по умолчанию: 0. + +**Пример** + +Рассмотрим таблицу: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +При включенной настройке `input_format_tsv_enum_as_number`: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +``` + +сгенерирует исключение. + ## input_format_null_as_default {#settings-input-format-null-as-default} Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов). @@ -1127,6 +1175,50 @@ SELECT area/period FROM account_orders FORMAT JSON; Для формата CSV включает или выключает парсинг неэкранированной строки `NULL` как литерала (синоним для `\N`) +## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} + +Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV. + +Возможные значения: + +- 0 — парсинг значений перечисления как значений. +- 1 — парсинг значений перечисления как идентификаторов перечисления. + +Значение по умолчанию: 0. + +**Пример** + +Рассмотрим таблицу: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +При включенной настройке `input_format_csv_enum_as_number`: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +``` + +сгенерирует исключение. + ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль). @@ -2095,4 +2187,10 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_tsv_null_representation {#output_format_tsv_null_representation} + +Позволяет настраивать представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Настройка управляет форматом выходных данных, `\N` является единственным поддерживаемым представлением для формата входных данных TSV. + +Значение по умолчанию: `\N`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/system-tables/replicated_fetches.md b/docs/ru/operations/system-tables/replicated_fetches.md new file mode 100644 index 00000000000..94584f390ee --- /dev/null +++ b/docs/ru/operations/system-tables/replicated_fetches.md @@ -0,0 +1,70 @@ +# system.replicated_fetches {#system_tables-replicated_fetches} + +Содержит информацию о выполняемых в данный момент фоновых операциях скачивания кусков данных с других реплик. + +Столбцы: + +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных. + +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — время, прошедшее от момента начала скачивания куска, в секундах. + +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — доля выполненной работы от 0 до 1. + +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — имя скачиваемого куска. + +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к скачиваемому куску. + +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции. + +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — общий размер сжатой информации в скачиваемом куске в байтах. + +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер сжатой информации, считанной из скачиваемого куска, в байтах. + +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к исходной реплике. + +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — имя хоста исходной реплики. + +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — номер порта исходной реплики. + +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — имя межсерверной схемы. + +- `URI` ([String](../../sql-reference/data-types/string.md)) — универсальный идентификатор ресурса. + +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на использование выражения `TO DETACHED` в текущих фоновых операциях. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор потока. + +**Пример** + +``` sql +SELECT * FROM system.replicated_fetches LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +database: default +table: t +elapsed: 7.243039876 +progress: 0.41832135995612835 +result_part_name: all_0_0_0 +result_part_path: /var/lib/clickhouse/store/700/70080a04-b2de-4adf-9fa5-9ea210e81766/all_0_0_0/ +partition_id: all +total_size_bytes_compressed: 1052783726 +bytes_read_compressed: 440401920 +source_replica_path: /clickhouse/test/t/replicas/1 +source_replica_hostname: node1 +source_replica_port: 9009 +interserver_scheme: http +URI: http://node1:9009/?endpoint=DataPartsExchange%3A%2Fclickhouse%2Ftest%2Ft%2Freplicas%2F1&part=all_0_0_0&client_protocol_version=4&compress=false +to_detached: 0 +thread_id: 54 +``` + +**Смотрите также** + +- [Управление таблицами ReplicatedMergeTree](../../sql-reference/statements/system/#query-language-system-replicated) + +[Оригинальная статья](https://clickhouse.tech/docs/en/operations/system_tables/replicated_fetches) diff --git a/docs/ru/sql-reference/aggregate-functions/index.md b/docs/ru/sql-reference/aggregate-functions/index.md index e7f6acee738..4a7768f587f 100644 --- a/docs/ru/sql-reference/aggregate-functions/index.md +++ b/docs/ru/sql-reference/aggregate-functions/index.md @@ -44,8 +44,6 @@ SELECT sum(y) FROM t_null_big └────────┘ ``` -Функция `sum` работает с `NULL` как с `0`. В частности, это означает, что если на вход в функцию подать выборку, где все значения `NULL`, то результат будет `0`, а не `NULL`. - Теперь с помощью функции `groupArray` сформируем массив из столбца `y`: ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md new file mode 100644 index 00000000000..a2e3764193e --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -0,0 +1,40 @@ +--- +toc_priority: 150 +--- + +## initializeAggregation {#initializeaggregation} + +Инициализирует агрегацию для введеных строчек. Предназначена для функций с суффиксом `State`. +Поможет вам проводить тесты или работать со столбцами типов: `AggregateFunction` и `AggregationgMergeTree`. + +**Синтаксис** + +``` sql +initializeAggregation (aggregate_function, column_1, column_2); +``` + +**Параметры** + +- `aggregate_function` — название функции агрегации, состояние которой нужно создать. [String](../../../sql-reference/data-types/string.md#string). +- `column_n` — столбец, который передается в функцию агрегации как аргумент. [String](../../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +Возвращает результат агрегации введенной информации. Тип возвращаемого значения такой же, как и для функции, которая становится первым аргументом для `initializeAgregation`. + +Пример: + +Возвращаемый тип функций с суффиксом `State` — `AggregateFunction`. + +**Пример** + +Запрос: + +```sql +SELECT uniqMerge(state) FROM (SELECT initializeAggregation('uniqState', number % 3) AS state FROM system.numbers LIMIT 10000); +``` +Результат: + +┌─uniqMerge(state)─┐ +│ 3 │ +└──────────────────┘ diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 62181eebb4b..16f21e3a80c 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1157,6 +1157,7 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res ┌─res──────────┐ │ [1, 2, 3, 4] │ └──────────────┘ +``` ## arrayAUC {#arrayauc} diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index d24de2faae1..deffc935870 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -234,10 +234,124 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d Переводит дату-с-временем в номер секунды, начиная с некоторого фиксированного момента в прошлом. +## date_trunc {#date_trunc} + +Отсекает от даты и времени части, меньшие чем указанная часть. + +**Синтаксис** + +``` sql +date_trunc(unit, value[, timezone]) +``` + +Синоним: `dateTrunc`. + +**Параметры** + +- `unit` — Название части даты или времени. [String](../syntax.md#syntax-string-literal). + Возможные значения: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `value` — Дата и время. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../../sql-reference/data-types/string.md) + +**Возвращаемое значение** + +- Дата и время, отсеченные до указанной части. + +Тип: [Datetime](../../sql-reference/data-types/datetime.md). + +**Примеры** + +Запрос без указания часового пояса: + +``` sql +SELECT now(), date_trunc('hour', now()); +``` + +Результат: + +``` text +┌───────────────now()─┬─date_trunc('hour', now())─┐ +│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │ +└─────────────────────┴───────────────────────────┘ +``` + +Запрос с указанием часового пояса: + +```sql +SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); +``` + +Результат: + +```text +┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐ +│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │ +└─────────────────────┴────────────────────────────────────────────┘ +``` + +**См. также** + +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) + ## now {#now} -Принимает ноль аргументов и возвращает текущее время на один из моментов выполнения запроса. -Функция возвращает константу, даже если запрос выполнялся долго. +Возвращает текущую дату и время. + +**Синтаксис** + +``` sql +now([timezone]) +``` + +**Параметры** + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). [String](../../sql-reference/data-types/string.md) + +**Возвращаемое значение** + +- Текущие дата и время. + +Тип: [Datetime](../../sql-reference/data-types/datetime.md). + +**Пример** + +Запрос без указания часового пояса: + +``` sql +SELECT now(); +``` + +Результат: + +``` text +┌───────────────now()─┐ +│ 2020-10-17 07:42:09 │ +└─────────────────────┘ +``` + +Запрос с указанием часового пояса: + +``` sql +SELECT now('Europe/Moscow'); +``` + +Результат: + +``` text +┌─now('Europe/Moscow')─┐ +│ 2020-10-17 10:42:23 │ +└──────────────────────┘ +``` ## today {#today} diff --git a/docs/ru/sql-reference/functions/encryption-functions.md b/docs/ru/sql-reference/functions/encryption-functions.md new file mode 100644 index 00000000000..f1f6516d453 --- /dev/null +++ b/docs/ru/sql-reference/functions/encryption-functions.md @@ -0,0 +1,382 @@ +--- +toc_priority: 67 +toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448\u0438\u0444\u0440\u043e\u0432\u0430\u043d\u0438\u044f" +--- + +# Функции шифрования {#encryption-functions} + +Даннвые функции реализуют шифрование и расшифровку данных с помощью AES (Advanced Encryption Standard) алгоритма. + +Длина ключа зависит от режима шифрования. Он может быть длинной в 16, 24 и 32 байта для режимов шифрования `-128-`, `-196-` и `-256-` соответственно. + +Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются). + +Обратите внимание, что эти функции работают медленно. + +## encrypt {#encrypt} + +Функция поддерживает шифрование данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Синтаксис** + +``` sql +encrypt('mode', 'plaintext', 'key' [, iv, aad]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов необязателен. [String](../../sql-reference/data-types/string.md#string). +- `aad` — дополнительные аутентифицированные данные. Не шифруются, но влияют на расшифровку. Параметр работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Пример без `iv`: + +Запрос: + +``` sql +SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ +│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ +│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ +│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Пример с `iv`: + +Запрос: + +``` sql +SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ +│ aes-256-ctr │ │ +│ aes-256-ctr │ 7FB039F7 │ +│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ +└─────────────┴───────────────────────────────────────────────┘ +``` + +Пример в режиме `-gcm`: + +Запрос: + +``` sql +SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ +│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ +│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ +│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +Пример в режиме `-gcm` и с `aad`: + +Запрос: + +``` sql +SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ +│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ +│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ +│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ +└─────────────┴────────────────────────────────────────────────────────────────────────┘ +``` + +## aes_encrypt_mysql {#aes_encrypt_mysql} + +Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt). + +Функция поддерживает шифрофание данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Синтаксис** + +```sql +aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Пример без `iv`: + +Запрос: + +``` sql +SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ +│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ +│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ +│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ +└─────────────┴──────────────────────────────────────────────────────────────────┘ +``` + +Пример с `iv`: + +Запрос: + +``` sql +SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ +│ aes-256-cfb128 │ │ +│ aes-256-cfb128 │ 7FB039F7 │ +│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ +└────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## decrypt {#decrypt} + +Функция поддерживает расшифровку данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm + +**Синтаксис** + +```sql +decrypt('mode', 'ciphertext', 'key' [, iv, aad]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Обязателен для `-gcm` режимов, для остальных режимов опциональный. [String](../../sql-reference/data-types/string.md#string). +- `aad` — дополнительные аутентифицированные данные. Текст не будет расшифрован, если это значение неверно. Работает только с `-gcm` режимами. Для остальных вызовет исключение. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Запрос: + +``` sql + +SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +``` + +Результат: + +```text +┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ +│ aes-128-ecb │ │ +│ aes-128-ecb │ text │ +│ aes-128-ecb │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` + +## aes_decrypt_mysql {#aes_decrypt_mysql} + +Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt). + +Функция поддерживает расшифровку данных следующими режимами: + +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb1, aes-192-cfb1, aes-256-cfb1 +- aes-128-cfb8, aes-192-cfb8, aes-256-cfb8 +- aes-128-cfb128, aes-192-cfb128, aes-256-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb + +**Синтаксис** + +```sql +aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) +``` + +**Параметры** + +- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — зашифрованный текст, который будет расшифрован. [String](../../sql-reference/data-types/string.md#string). +- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string). +- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string). + + +**Возвращаемое значение** + +- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string). + +**Примеры** + +Создадим такую таблицу: + +Запрос: + +``` sql +CREATE TABLE encryption_test +( + input String, + key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), + iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), + key32 String DEFAULT substring(key, 1, 32), + key24 String DEFAULT substring(key, 1, 24), + key16 String DEFAULT substring(key, 1, 16) +) Engine = Memory; +``` + +Вставим эти данные: + +Запрос: + +``` sql +INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` + +Запрос: + +``` sql +SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +``` + +Результат: + +``` text +┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ +│ aes-128-cbc │ │ +│ aes-128-cbc │ text │ +│ aes-128-cbc │ What Is ClickHouse? │ +└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 9f4f2ebd1e9..00dd660bc16 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -306,3 +306,68 @@ execute_native_thread_routine start_thread clone ``` + +## tid {#tid} + +Возвращает id потока, в котором обрабатывается текущий [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +tid() +``` + +**Возвращаемое значение** + +- Id текущего потока. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Пример** + +Запрос: + +``` sql +SELECT tid(); +``` + +Результат: + +``` text +┌─tid()─┐ +│ 3878 │ +└───────┘ +``` +## logTrace {#logtrace} + + Выводит сообщение в лог сервера для каждого [Block](https://clickhouse.tech/docs/ru/development/architecture/#block). + +**Синтаксис** + +``` sql +logTrace('message') +``` + +**Параметры** + +- `message` — сообщение, которое отправляется в серверный лог. [String](../../sql-reference/data-types/string.md#string). + +**Возвращаемое значение** + +- Всегда возвращает 0. + +**Example** + +Запрос: + +``` sql +SELECT logTrace('logTrace message'); +``` + +Результат: + +``` text +┌─logTrace('logTrace message')─┐ +│ 0 │ +└──────────────────────────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/introspection/) \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 3733e570f10..4a314bd22d8 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -319,6 +319,62 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut Функция принимает число или дату или дату-с-временем и возвращает строку, содержащую байты, представляющие соответствующее значение в host order (little endian). При этом, отбрасываются нулевые байты с конца. Например, значение 255 типа UInt32 будет строкой длины 1 байт. +## reinterpretAsUUID {#reinterpretasuuid} + +Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количетсвом нулевых байт с конца. Если строка длиннее, чем шестнадцать байт, то игнорируются лишние байты с конца. + +**Синтаксис** + +``` sql +reinterpretAsUUID(fixed_string) +``` + +**Параметры** + +- `fixed_string` — cтрока с big-endian порядком байтов. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). + +**Возвращаемое значение** + +- Значение типа [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). + +**Примеры** + +Интерпретация строки как UUID. + +Запрос: + +``` sql +SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) +``` + +Результат: + +``` text +┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ +│ 08090a0b-0c0d-0e0f-0001-020304050607 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Переход в UUID и обратно. + +Запрос: + +``` sql +WITH + generateUUIDv4() AS uuid, + identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, + reinterpretAsUUID(reverse(unhex(str))) AS uuid2 +SELECT uuid = uuid2; +``` + +Результат: + +``` text +┌─equals(uuid, uuid2)─┐ +│ 1 │ +└─────────────────────┘ +``` + ## CAST(x, T) {#type_conversion_function-cast} Преобразует x в тип данных t. diff --git a/docs/ru/sql-reference/statements/create/dictionary.md b/docs/ru/sql-reference/statements/create/dictionary.md index a20dc812e02..3134a89483b 100644 --- a/docs/ru/sql-reference/statements/create/dictionary.md +++ b/docs/ru/sql-reference/statements/create/dictionary.md @@ -16,7 +16,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Создаёт [внешний словарь](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) с заданной [структурой](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [источником](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [способом размещения в памяти](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) и [периодом обновления](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). @@ -27,5 +27,5 @@ LIFETIME([MIN val1] MAX val2) Смотрите [Внешние словари](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/dictionary) \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index 491bbfe892b..8facf140118 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -27,9 +27,9 @@ toc_title: FROM ### Недостатки {#drawbacks} -Запросы, которые используют `FINAL` выполняются не так быстро, как аналогичные запросы без него, потому что: +Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что: -- Запрос выполняется в одном потоке, и данные мёржатся во время выполнения запроса. +- Данные мёржатся во время выполнения запроса. - Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе. **В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##} diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index a0454ef1d91..0c8a29d0c26 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -43,6 +43,153 @@ toc_title: GROUP BY Если в `GROUP BY` передать несколько ключей, то в результате мы получим все комбинации выборки, как если бы `NULL` был конкретным значением. +## Модификатор WITH ROLLUP {#with-rollup-modifier} + +Модификатор `WITH ROLLUP` применяется для подсчета подытогов для ключевых выражений. При этом учитывается порядок следования ключевых выражений в списке `GROUP BY`. Подытоги подсчитываются в обратном порядке: сначала для последнего ключевого выражения в списке, потом для предпоследнего и так далее вплоть до самого первого ключевого выражения. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Запрос: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из четырех таблиц с подытогами, которые как бы "сворачиваются" справа налево: + +- `GROUP BY year, month, day`; +- `GROUP BY year, month` (а колонка `day` заполнена нулями); +- `GROUP BY year` (теперь обе колонки `month, day` заполнены нулями); +- и общий итог (все три колонки с ключевыми выражениями заполнены нулями). + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + +## Модификатор WITH CUBE {#with-cube-modifier} + +Модификатор `WITH CUBE` применятеся для расчета подытогов по всем комбинациям группировки ключевых выражений в списке `GROUP BY`. + +Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. + +!!! note "Примечание" + Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. + +**Пример** + +Рассмотрим таблицу t: + +```text +┌─year─┬─month─┬─day─┐ +│ 2019 │ 1 │ 5 │ +│ 2019 │ 1 │ 15 │ +│ 2020 │ 1 │ 5 │ +│ 2020 │ 1 │ 15 │ +│ 2020 │ 10 │ 5 │ +│ 2020 │ 10 │ 15 │ +└──────┴───────┴─────┘ +``` + +Query: + +```sql +SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; +``` + +Поскольку секция `GROUP BY` содержит три ключевых выражения, результат состоит из восьми таблиц с подытогами — по таблице для каждой комбинации ключевых выражений: + +- `GROUP BY year, month, day` +- `GROUP BY year, month` +- `GROUP BY year, day` +- `GROUP BY year` +- `GROUP BY month, day` +- `GROUP BY month` +- `GROUP BY day` +- и общий итог. + +Колонки, которые не участвуют в `GROUP BY`, заполнены нулями. + +```text +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 10 │ 15 │ 1 │ +│ 2020 │ 1 │ 5 │ 1 │ +│ 2019 │ 1 │ 5 │ 1 │ +│ 2020 │ 1 │ 15 │ 1 │ +│ 2019 │ 1 │ 15 │ 1 │ +│ 2020 │ 10 │ 5 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 1 │ 0 │ 2 │ +│ 2020 │ 1 │ 0 │ 2 │ +│ 2020 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2020 │ 0 │ 5 │ 2 │ +│ 2019 │ 0 │ 5 │ 1 │ +│ 2020 │ 0 │ 15 │ 2 │ +│ 2019 │ 0 │ 15 │ 1 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 2019 │ 0 │ 0 │ 2 │ +│ 2020 │ 0 │ 0 │ 4 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 5 │ 2 │ +│ 0 │ 10 │ 15 │ 1 │ +│ 0 │ 10 │ 5 │ 1 │ +│ 0 │ 1 │ 15 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 1 │ 0 │ 4 │ +│ 0 │ 10 │ 0 │ 2 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 5 │ 3 │ +│ 0 │ 0 │ 15 │ 3 │ +└──────┴───────┴─────┴─────────┘ +┌─year─┬─month─┬─day─┬─count()─┐ +│ 0 │ 0 │ 0 │ 6 │ +└──────┴───────┴─────┴─────────┘ +``` + + ## Модификатор WITH TOTALS {#with-totals-modifier} Если указан модификатор `WITH TOTALS`, то будет посчитана ещё одна строчка, в которой в столбцах-ключах будут содержаться значения по умолчанию (нули, пустые строки), а в столбцах агрегатных функций - значения, посчитанные по всем строкам («тотальные» значения). @@ -86,8 +233,6 @@ SELECT FROM hits ``` -Но, в отличие от стандартного SQL, если в таблице нет строк (вообще нет или после фильтрации с помощью WHERE), в качестве результата возвращается пустой результат, а не результат из одной строки, содержащий «начальные» значения агрегатных функций. - В отличие от MySQL (и в соответствии со стандартом SQL), вы не можете получить какое-нибудь значение некоторого столбца, не входящего в ключ или агрегатную функцию (за исключением константных выражений). Для обхода этого вы можете воспользоваться агрегатной функцией any (получить первое попавшееся значение) или min/max. Пример: @@ -103,10 +248,6 @@ GROUP BY domain GROUP BY вычисляет для каждого встретившегося различного значения ключей, набор значений агрегатных функций. -Не поддерживается GROUP BY по столбцам-массивам. - -Не поддерживается указание констант в качестве аргументов агрегатных функций. Пример: `sum(1)`. Вместо этого, вы можете избавиться от констант. Пример: `count()`. - ## Детали реализации {#implementation-details} Агрегация является одной из наиболее важных возможностей столбцовых СУБД, и поэтому её реализация является одной из наиболее сильно оптимизированных частей ClickHouse. По умолчанию агрегирование выполняется в памяти с помощью хэш-таблицы. Она имеет более 40 специализаций, которые выбираются автоматически в зависимости от типов данных ключа группировки. diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index f5fe2788370..c2e05f05079 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -18,7 +18,7 @@ SELECT [DISTINCT] expr_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON )|(USING ) [PREWHERE expr] [WHERE expr] -[GROUP BY expr_list] [WITH TOTALS] +[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS] [HAVING expr] [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [LIMIT [offset_value, ]n BY columns] diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 3560b6ad40a..4106100bfa3 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.20 nltk==3.5 nose==1.3.7 -protobuf==3.13.0 +protobuf==3.14.0 numpy==1.19.2 Pygments==2.5.2 pymdown-extensions==8.0 diff --git a/docs/tr/sql-reference/statements/create.md b/docs/tr/sql-reference/statements/create.md index 79bdb45f9e4..78390564880 100644 --- a/docs/tr/sql-reference/statements/create.md +++ b/docs/tr/sql-reference/statements/create.md @@ -291,7 +291,7 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` Oluşturuyor [dış sözlük](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) verilen ile [yapılı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [kaynaklı](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [düzen](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) ve [ömür](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). diff --git a/docs/zh/commercial/support.md b/docs/zh/commercial/support.md index e8462fc962e..f543338d4e6 100644 --- a/docs/zh/commercial/support.md +++ b/docs/zh/commercial/support.md @@ -2,17 +2,17 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 toc_priority: 3 -toc_title: "\u788C\u83BD\u7984Support:" +toc_title: "\u5546\u4e1a\u652f\u6301" --- # ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers} !!! info "信息" - 如果您已经推出ClickHouse商业支持服务,请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。 + 如果您已经推出ClickHouse商业支持服务,请随时 [提交一个 pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。 -## 敏锐性 {#altinity} +## Altinity {#altinity} -隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄and陇.貌路.隆拢脳枚脢虏 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇 访问 [www.altinity.com](https://www.altinity.com/) 欲了解更多信息. + Altinity 自从 2017 年开始已经为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。 ## Mafiree {#mafiree} diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 192203c6fe6..3eac3905f23 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -21,15 +21,15 @@ toc_title: "\u266A\u64CD\u573A\u266A" ClickHouse体验还有如下: [ClickHouse管理服务](https://cloud.yandex.com/services/managed-clickhouse) -实例托管 [Yandex云](https://cloud.yandex.com/). -更多信息 [云提供商](../commercial/cloud.md). +实例托管 [Yandex云](https://cloud.yandex.com/)。 +更多信息 [云提供商](../commercial/cloud.md)。 ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的. 体验平台后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。 体验平台也同样提供了ClickHouse HTTPS服务端口。 -您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机 -有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md). +您可以使用任何HTTP客户端向体验平台进行查询,例如 [curl](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 驱动。 +有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md)。 | 参数 | 值 | |:---------|:--------------------------------------| diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 0ce138c764e..72491bb53ff 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -7,35 +7,37 @@ toc_title: "\u6570\u636E\u5907\u4EFD" # 数据备份 {#data-backup} -碌莽禄While: [复制](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施并不涵盖所有可能的情况,可以规避。 +尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 -为了有效地减少可能的人为错误,您应该仔细准备备份和还原数据的策略 **提前**. +为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略. -每家公司都有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 什么适用于一千兆字节的数据可能不会为几十pb的工作。 有多种可能的方法有自己的优点和缺点,这将在下面讨论。 这是一个好主意,使用几种方法,而不是只是一个,以弥补其各种缺点。 +不同公司有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 适用于 1GB 的数据的方案可能并不适用于几十 PB 数据的情况。 有多种可能的并有自己优缺点的方法,这将在下面讨论。 好的主意是同时结合使用多种方法而不是仅使用一种,这样可以弥补不同方法各自的缺点。 !!! note "注" 请记住,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时(或者至少需要比业务能够容忍的时间更长),恢复可能无法正常工作。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上练习。 ## 将源数据复制到其他地方 {#duplicating-source-data-somewhere-else} -通常被摄入到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). +通常被聚集到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). ## 文件系统快照 {#filesystem-snapshots} 某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们从 [分布](../engines/table-engines/special/distributed.md) 用于以下目的的表 `SELECT` 查询。 任何修改数据的查询都无法访问此类副本上的快照。 作为奖励,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。 -## ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier} +## clickhouse-copier {#clickhouse-copier} -[ツ环板-ョツ嘉ッツ偲](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 它还可用于备份和还原目的,因为它可以在ClickHouse表和集群之间可靠地复制数据。 +[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 因为它可以在ClickHouse表和集群之间可靠地复制数据,所以它还可用于备份和还原数据。 对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。 ## 部件操作 {#manipulations-with-parts} -ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是使用硬链接来实现 `/var/lib/clickhouse/shadow/` 文件夹中,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)). +ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)). + +数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。 有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql-reference/statements/alter.md#alter_manipulations-with-partitions). -第三方工具可用于自动化此方法: [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup). +第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). [原始文章](https://clickhouse.tech/docs/en/operations/backup/) diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index a5c30e46f4c..73896d3f8c1 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -33,10 +33,10 @@ ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在[系统指标](system-tables/metrics.md#system_tables-metrics),[系统事件](system-tables/events.md#system_tables-events)以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)等系统表查看所有的指标项。 -可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 +可以配置ClickHouse向[Graphite](https://github.com/graphite-project)推送监控信息并导入指标。参考[Graphite监控](server-configuration-parameters/settings.md#server_configuration_parameters-graphite)配置文件。在配置指标导出之前,需要参考[Graphite官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建Graphite服务。 -此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 +此外,您可以通过HTTP API监视服务器可用性。将HTTP GET请求发送到`/ping`。如果服务器可用,它将以 `200 OK` 响应。 -要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 +要监视服务器集群的配置,应设置[max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回`200 OK`。 如果副本滞后,请求将返回`503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 diff --git a/docs/zh/sql-reference/statements/create.md b/docs/zh/sql-reference/statements/create.md index fa3cb8e5ea5..639af0841dc 100644 --- a/docs/zh/sql-reference/statements/create.md +++ b/docs/zh/sql-reference/statements/create.md @@ -259,5 +259,5 @@ CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster] PRIMARY KEY key1, key2 SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN])) LAYOUT(LAYOUT_NAME([param_name param_value])) -LIFETIME([MIN val1] MAX val2) +LIFETIME({MIN min_val MAX max_val | max_val}) ``` diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index 7cdd77b4b7c..b6ea2c7baf4 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -680,7 +680,7 @@ void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & fil for (auto & elem : file_changes) { auto & file = elem.second.file_change; - if (file.path != file.old_path) + if (!file.old_path.empty() && file.path != file.old_path) snapshot[file.path] = snapshot[file.old_path]; } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 8290118089c..da22452819a 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -329,14 +329,20 @@ int mainEntryClickHouseInstall(int argc, char ** argv) bool has_password_for_default_user = false; - if (!fs::exists(main_config_file)) + if (!fs::exists(config_d)) { fmt::print("Creating config directory {} that is used for tweaks of main server configuration.\n", config_d.string()); fs::create_directory(config_d); + } + if (!fs::exists(users_d)) + { fmt::print("Creating config directory {} that is used for tweaks of users configuration.\n", users_d.string()); fs::create_directory(users_d); + } + if (!fs::exists(main_config_file)) + { std::string_view main_config_content = getResource("config.xml"); if (main_config_content.empty()) { @@ -349,7 +355,30 @@ int mainEntryClickHouseInstall(int argc, char ** argv) out.sync(); out.finalize(); } + } + else + { + fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string()); + ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); + + if (configuration->has("path")) + { + data_path = configuration->getString("path"); + fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); + } + + if (configuration->has("logger.log")) + { + log_path = fs::path(configuration->getString("logger.log")).remove_filename(); + fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); + } + } + + + if (!fs::exists(users_config_file)) + { std::string_view users_config_content = getResource("users.xml"); if (users_config_content.empty()) { @@ -365,38 +394,17 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { - { - fmt::print("Config file {} already exists, will keep it and extract path info from it.\n", main_config_file.string()); - - ConfigProcessor processor(main_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - - if (configuration->has("path")) - { - data_path = configuration->getString("path"); - fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); - } - - if (configuration->has("logger.log")) - { - log_path = fs::path(configuration->getString("logger.log")).remove_filename(); - fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); - } - } + fmt::print("Users config file {} already exists, will keep it and extract users info from it.\n", users_config_file.string()); /// Check if password for default user already specified. + ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - if (fs::exists(users_config_file)) + if (!configuration->getString("users.default.password", "").empty() + || configuration->getString("users.default.password_sha256_hex", "").empty() + || configuration->getString("users.default.password_double_sha1_hex", "").empty()) { - ConfigProcessor processor(users_config_file.string(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(processor.processConfig())); - - if (!configuration->getString("users.default.password", "").empty() - || configuration->getString("users.default.password_sha256_hex", "").empty() - || configuration->getString("users.default.password_double_sha1_hex", "").empty()) - { - has_password_for_default_user = true; - } + has_password_for_default_user = true; } } diff --git a/programs/server/ya.make b/programs/server/ya.make index b4deaafedc5..1b99fb31a3f 100644 --- a/programs/server/ya.make +++ b/programs/server/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + PROGRAM(clickhouse-server) PEERDIR( diff --git a/programs/ya.make b/programs/ya.make index e77814ddf69..2de3052f1d2 100644 --- a/programs/ya.make +++ b/programs/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + PROGRAM(clickhouse) CFLAGS( diff --git a/src/Access/ya.make b/src/Access/ya.make index 97640344498..b4469aa3167 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Access/ya.make.in b/src/Access/ya.make.in index ce7cd88b272..0c5692a9bfa 100644 --- a/src/Access/ya.make.in +++ b/src/Access/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 6ea63bedaf0..05824947b87 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -8,7 +8,7 @@ namespace DB { AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter( - const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const + const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const { return std::make_shared(types[0], params); } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 29c5de0021c..eb1583df92a 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -69,7 +69,7 @@ public: } AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override; + const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override; }; diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 19a175de911..276abb90920 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -1,6 +1,7 @@ #include #include #include "registerAggregateFunctions.h" +#include "AggregateFunctionNull.h" namespace DB @@ -8,6 +9,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -40,6 +42,164 @@ public: } }; +/** There are two cases: for single argument and variadic. + * Code for single argument is much more efficient. + */ +template +class AggregateFunctionIfNullUnary final + : public AggregateFunctionNullBase> +{ +private: + size_t num_arguments; + + using Base = AggregateFunctionNullBase>; +public: + + String getName() const override + { + return Base::getName(); + } + + AggregateFunctionIfNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : Base(std::move(nested_function_), arguments, params), num_arguments(arguments.size()) + { + if (num_arguments == 0) + throw Exception("Aggregate function " + getName() + " require at least one argument", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) + { + const IColumn * filter_column = columns[num_arguments - 1]; + if (const ColumnNullable * nullable_column = typeid_cast(filter_column)) + filter_column = nullable_column->getNestedColumnPtr().get(); + + return assert_cast(*filter_column).getData()[row_num]; + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + const ColumnNullable * column = assert_cast(columns[0]); + const IColumn * nested_column = &column->getNestedColumn(); + if (!column->isNullAt(row_num) && singleFilter(columns, row_num, num_arguments)) + { + this->setFlag(place); + this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena); + } + } +}; + +template +class AggregateFunctionIfNullVariadic final + : public AggregateFunctionNullBase> +{ +public: + + String getName() const override + { + return Base::getName(); + } + + AggregateFunctionIfNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : Base(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size()) + { + if (number_of_arguments == 1) + throw Exception("Logical error: single argument is passed to AggregateFunctionIfNullVariadic", ErrorCodes::LOGICAL_ERROR); + + if (number_of_arguments > MAX_ARGS) + throw Exception("Maximum number of arguments for aggregate function with Nullable types is " + toString(size_t(MAX_ARGS)), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < number_of_arguments; ++i) + is_nullable[i] = arguments[i]->isNullable(); + } + + static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments) + { + return assert_cast(*columns[num_arguments - 1]).getData()[row_num]; + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + /// This container stores the columns we really pass to the nested function. + const IColumn * nested_columns[number_of_arguments]; + + for (size_t i = 0; i < number_of_arguments; ++i) + { + if (is_nullable[i]) + { + const ColumnNullable & nullable_col = assert_cast(*columns[i]); + if (null_is_skipped && nullable_col.isNullAt(row_num)) + { + /// If at least one column has a null value in the current row, + /// we don't process this row. + return; + } + nested_columns[i] = &nullable_col.getNestedColumn(); + } + else + nested_columns[i] = columns[i]; + } + + if (singleFilter(nested_columns, row_num, number_of_arguments)) + { + this->setFlag(place); + this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena); + } + } + +private: + using Base = AggregateFunctionNullBase>; + + enum { MAX_ARGS = 8 }; + size_t number_of_arguments = 0; + std::array is_nullable; /// Plain array is better than std::vector due to one indirection less. +}; + + +AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, + const Array & params, const AggregateFunctionProperties & properties) const +{ + bool return_type_is_nullable = !properties.returns_default_when_only_null && getReturnType()->canBeInsideNullable(); + size_t nullable_size = std::count_if(arguments.begin(), arguments.end(), [](const auto & element) { return element->isNullable(); }); + return_type_is_nullable &= nullable_size != 1 || !arguments.back()->isNullable(); /// If only condition is nullable. we should non-nullable type. + bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; + + if (arguments.size() <= 2 && arguments.front()->isNullable()) + { + if (return_type_is_nullable) + { + return std::make_shared>(nested_func, arguments, params); + } + else + { + if (serialize_flag) + return std::make_shared>(nested_func, arguments, params); + else + return std::make_shared>(nested_func, arguments, params); + } + } + else + { + if (return_type_is_nullable) + { + return std::make_shared>(nested_function, arguments, params); + } + else + { + if (serialize_flag) + return std::make_shared>(nested_function, arguments, params); + else + return std::make_shared>(nested_function, arguments, params); + } + } +} + void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index f04450c9142..d5d2b9be0dd 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -109,6 +109,10 @@ public: { return nested_func->isState(); } + + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, + const Array & params, const AggregateFunctionProperties & properties) const override; }; } diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.cpp b/src/AggregateFunctions/AggregateFunctionMLMethod.cpp index 812794902df..6c5c5af2f1d 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.cpp +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.cpp @@ -143,7 +143,7 @@ void LinearModelData::updateState() void LinearModelData::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const @@ -264,8 +264,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) average_gradient[i] = average_gradient[i] * frac + adam_rhs.average_gradient[i] * rhs_frac; average_squared_gradient[i] = average_squared_gradient[i] * frac + adam_rhs.average_squared_gradient[i] * rhs_frac; } - beta1_powered_ *= adam_rhs.beta1_powered_; - beta2_powered_ *= adam_rhs.beta2_powered_; + beta1_powered *= adam_rhs.beta1_powered; + beta2_powered *= adam_rhs.beta2_powered; } void Adam::update(UInt64 batch_size, std::vector & weights, Float64 & bias, Float64 learning_rate, const std::vector & batch_gradient) @@ -282,21 +282,21 @@ void Adam::update(UInt64 batch_size, std::vector & weights, Float64 & b for (size_t i = 0; i != average_gradient.size(); ++i) { Float64 normed_gradient = batch_gradient[i] / batch_size; - average_gradient[i] = beta1_ * average_gradient[i] + (1 - beta1_) * normed_gradient; - average_squared_gradient[i] = beta2_ * average_squared_gradient[i] + - (1 - beta2_) * normed_gradient * normed_gradient; + average_gradient[i] = beta1 * average_gradient[i] + (1 - beta1) * normed_gradient; + average_squared_gradient[i] = beta2 * average_squared_gradient[i] + + (1 - beta2) * normed_gradient * normed_gradient; } for (size_t i = 0; i < weights.size(); ++i) { weights[i] += (learning_rate * average_gradient[i]) / - ((1 - beta1_powered_) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered_)) + eps_)); + ((1 - beta1_powered) * (sqrt(average_squared_gradient[i] / (1 - beta2_powered)) + eps)); } bias += (learning_rate * average_gradient[weights.size()]) / - ((1 - beta1_powered_) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered_)) + eps_)); + ((1 - beta1_powered) * (sqrt(average_squared_gradient[weights.size()] / (1 - beta2_powered)) + eps)); - beta1_powered_ *= beta1_; - beta2_powered_ *= beta2_; + beta1_powered *= beta1; + beta2_powered *= beta2; } void Adam::addToBatch( @@ -348,7 +348,7 @@ void Nesterov::update(UInt64 batch_size, std::vector & weights, Float64 for (size_t i = 0; i < batch_gradient.size(); ++i) { - accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; + accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size; } for (size_t i = 0; i < weights.size(); ++i) { @@ -375,9 +375,9 @@ void Nesterov::addToBatch( std::vector shifted_weights(weights.size()); for (size_t i = 0; i != shifted_weights.size(); ++i) { - shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_; + shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha; } - auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_; + auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha; gradient_computer.compute(batch_gradient, shifted_weights, shifted_bias, l2_reg_coef, target, columns, row_num); } @@ -411,7 +411,7 @@ void Momentum::update(UInt64 batch_size, std::vector & weights, Float64 for (size_t i = 0; i < batch_gradient.size(); ++i) { - accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + (learning_rate * batch_gradient[i]) / batch_size; + accumulated_gradient[i] = accumulated_gradient[i] * alpha + (learning_rate * batch_gradient[i]) / batch_size; } for (size_t i = 0; i < weights.size(); ++i) { @@ -448,7 +448,7 @@ void IWeightsUpdater::addToBatch( void LogisticRegression::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -516,7 +516,7 @@ void LogisticRegression::compute( void LinearRegression::predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h index 494907c4002..b6912405fef 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -23,7 +23,7 @@ GradientComputer class computes gradient according to its loss function class IGradientComputer { public: - IGradientComputer() {} + IGradientComputer() = default; virtual ~IGradientComputer() = default; @@ -39,7 +39,7 @@ public: virtual void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -51,7 +51,7 @@ public: class LinearRegression : public IGradientComputer { public: - LinearRegression() {} + LinearRegression() = default; void compute( std::vector & batch_gradient, @@ -64,7 +64,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -76,7 +76,7 @@ public: class LogisticRegression : public IGradientComputer { public: - LogisticRegression() {} + LogisticRegression() = default; void compute( std::vector & batch_gradient, @@ -89,7 +89,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const std::vector & weights, @@ -147,9 +147,9 @@ public: class Momentum : public IWeightsUpdater { public: - Momentum() {} + Momentum() = default; - Momentum(Float64 alpha) : alpha_(alpha) {} + explicit Momentum(Float64 alpha_) : alpha(alpha_) {} void update(UInt64 batch_size, std::vector & weights, Float64 & bias, Float64 learning_rate, const std::vector & batch_gradient) override; @@ -160,7 +160,7 @@ public: void read(ReadBuffer & buf) override; private: - Float64 alpha_{0.1}; + Float64 alpha{0.1}; std::vector accumulated_gradient; }; @@ -168,9 +168,9 @@ private: class Nesterov : public IWeightsUpdater { public: - Nesterov() {} + Nesterov() = default; - Nesterov(Float64 alpha) : alpha_(alpha) {} + explicit Nesterov(Float64 alpha_) : alpha(alpha_) {} void addToBatch( std::vector & batch_gradient, @@ -191,7 +191,7 @@ public: void read(ReadBuffer & buf) override; private: - const Float64 alpha_ = 0.9; + const Float64 alpha = 0.9; std::vector accumulated_gradient; }; @@ -201,8 +201,8 @@ class Adam : public IWeightsUpdater public: Adam() { - beta1_powered_ = beta1_; - beta2_powered_ = beta2_; + beta1_powered = beta1; + beta2_powered = beta2; } void addToBatch( @@ -225,11 +225,11 @@ public: private: /// beta1 and beta2 hyperparameters have such recommended values - const Float64 beta1_ = 0.9; - const Float64 beta2_ = 0.999; - const Float64 eps_ = 0.000001; - Float64 beta1_powered_; - Float64 beta2_powered_; + const Float64 beta1 = 0.9; + const Float64 beta2 = 0.999; + const Float64 eps = 0.000001; + Float64 beta1_powered; + Float64 beta2_powered; std::vector average_gradient; std::vector average_squared_gradient; @@ -241,7 +241,7 @@ private: class LinearModelData { public: - LinearModelData() {} + LinearModelData() = default; LinearModelData( Float64 learning_rate_, @@ -261,7 +261,7 @@ public: void predict( ColumnVector::Container & container, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const; @@ -360,7 +360,7 @@ public: void predictValues( ConstAggregateDataPtr place, IColumn & to, - ColumnsWithTypeAndName & arguments, + const ColumnsWithTypeAndName & arguments, size_t offset, size_t limit, const Context & context) const override diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 5e0d6ee6e21..f584ae1f34c 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -72,7 +72,7 @@ public: assert(nested_function); - if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) + if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params, properties)) return adapter; /// If applied to aggregate function with -State combinator, we apply -Null combinator to it's nested_function instead of itself. diff --git a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index 28c3f53d879..63dde3f1738 100644 --- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -187,7 +187,10 @@ struct AggregateFunctionTimeSeriesGroupSumData { size_t size = result.size(); writeVarUInt(size, buf); - buf.write(reinterpret_cast(result.data()), sizeof(result[0])); + if (size > 0) + { + buf.write(reinterpret_cast(result.data()), size * sizeof(result[0])); + } } void deserialize(ReadBuffer & buf) @@ -195,7 +198,10 @@ struct AggregateFunctionTimeSeriesGroupSumData size_t size = 0; readVarUInt(size, buf); result.resize(size); - buf.read(reinterpret_cast(result.data()), size * sizeof(result[0])); + if (size > 0) + { + buf.read(reinterpret_cast(result.data()), size * sizeof(result[0])); + } } }; template diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 2dd6ef8a9fd..96f0eb183be 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -239,7 +239,8 @@ public: } AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params, + const AggregateFunctionProperties & /*properties*/) const override { return std::make_shared>(nested_function, arguments, params); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 4f9552d2345..f46467514cf 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -33,6 +33,7 @@ using ConstAggregateDataPtr = const char *; class IAggregateFunction; using AggregateFunctionPtr = std::shared_ptr; +struct AggregateFunctionProperties; /** Aggregate functions interface. * Instances of classes with this interface do not contain the data itself for aggregation, @@ -60,7 +61,7 @@ public: throw Exception("Prediction is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - virtual ~IAggregateFunction() {} + virtual ~IAggregateFunction() = default; /** Data manipulating functions. */ @@ -113,7 +114,7 @@ public: virtual void predictValues( ConstAggregateDataPtr /* place */, IColumn & /*to*/, - ColumnsWithTypeAndName & /*arguments*/, + const ColumnsWithTypeAndName & /*arguments*/, size_t /*offset*/, size_t /*limit*/, const Context & /*context*/) const @@ -185,7 +186,8 @@ public: * arguments and params are for nested_function. */ virtual AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const + const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, + const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const { return nullptr; } diff --git a/src/AggregateFunctions/QuantileExact.h b/src/AggregateFunctions/QuantileExact.h index a2acde97f90..bc85d5c1c28 100644 --- a/src/AggregateFunctions/QuantileExact.h +++ b/src/AggregateFunctions/QuantileExact.h @@ -1,19 +1,17 @@ #pragma once -#include -#include #include #include #include #include #include +#include +#include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif namespace DB { + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -89,12 +87,7 @@ struct QuantileExact : QuantileExactBase> if (!array.empty()) { size_t n = level < 1 ? level * array.size() : (array.size() - 1); - -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm. -#else - std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm. -#endif + nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE: You can think of the radix-select algorithm. return array[n]; } @@ -113,12 +106,7 @@ struct QuantileExact : QuantileExactBase> auto level = levels[indices[i]]; size_t n = level < 1 ? level * array.size() : (array.size() - 1); - -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end()); -#else - std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); -#endif + nth_element(array.begin() + prev_n, array.begin() + n, array.end()); result[indices[i]] = array[n]; prev_n = n; } @@ -154,14 +142,10 @@ struct QuantileExactExclusive : public QuantileExact else if (n < 1) return static_cast(array[0]); -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end()); -#else - std::nth_element(array.begin(), array.begin() + n - 1, array.end()); -#endif - auto nth_element = std::min_element(array.begin() + n, array.end()); + nth_element(array.begin(), array.begin() + n - 1, array.end()); + auto nth_elem = std::min_element(array.begin() + n, array.end()); - return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); + return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); } return std::numeric_limits::quiet_NaN(); @@ -187,14 +171,10 @@ struct QuantileExactExclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end()); -#else - std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); -#endif - auto nth_element = std::min_element(array.begin() + n, array.end()); + nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + auto nth_elem = std::min_element(array.begin() + n, array.end()); - result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); + result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); prev_n = n - 1; } } @@ -226,14 +206,10 @@ struct QuantileExactInclusive : public QuantileExact return static_cast(array[array.size() - 1]); else if (n < 1) return static_cast(array[0]); -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin(), array.begin() + n - 1, array.end()); -#else - std::nth_element(array.begin(), array.begin() + n - 1, array.end()); -#endif - auto nth_element = std::min_element(array.begin() + n, array.end()); + nth_element(array.begin(), array.begin() + n - 1, array.end()); + auto nth_elem = std::min_element(array.begin() + n, array.end()); - return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); + return static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); } return std::numeric_limits::quiet_NaN(); @@ -257,14 +233,10 @@ struct QuantileExactInclusive : public QuantileExact result[indices[i]] = static_cast(array[0]); else { -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n - 1, array.end()); -#else - std::nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); -#endif - auto nth_element = std::min_element(array.begin() + n, array.end()); + nth_element(array.begin() + prev_n, array.begin() + n - 1, array.end()); + auto nth_elem = std::min_element(array.begin() + n, array.end()); - result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_element - array[n - 1]); + result[indices[i]] = static_cast(array[n - 1]) + (h - n) * static_cast(*nth_elem - array[n - 1]); prev_n = n - 1; } } diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index 02d43ede66d..908b8795bf8 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int TOO_LARGE_ARRAY_SIZE; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; } @@ -36,10 +37,11 @@ namespace ErrorCodes * uses asin, which slows down the algorithm a bit. */ template -class TDigest +class QuantileTDigest { using Value = Float32; using Count = Float32; + using BetterFloat = Float64; // For intermediate results and sum(Count). Must have better precision, than Count /** The centroid stores the weight of points around their mean value */ @@ -55,13 +57,6 @@ class TDigest , count(count_) {} - Centroid & operator+=(const Centroid & other) - { - count += other.count; - mean += other.count * (other.mean - mean) / count; - return *this; - } - bool operator<(const Centroid & other) const { return mean < other.mean; @@ -71,26 +66,42 @@ class TDigest /** :param epsilon: value \delta from the article - error in the range * quantile 0.5 (default is 0.01, i.e. 1%) + * if you change epsilon, you must also change max_centroids + * :param max_centroids: depends on epsilon, the better accuracy, the more centroids you need + * to describe data with this accuracy. Read article before changing. * :param max_unmerged: when accumulating count of new points beyond this * value centroid compression is triggered * (default is 2048, the higher the value - the * more memory is required, but amortization of execution time increases) + * Change freely anytime. */ struct Params { Value epsilon = 0.01; + size_t max_centroids = 2048; size_t max_unmerged = 2048; }; + /** max_centroids_deserialize should be >= all max_centroids ever used in production. + * This is security parameter, preventing allocation of too much centroids in deserialize, so can be relatively large. + */ + static constexpr size_t max_centroids_deserialize = 65536; - Params params; + static constexpr Params params{}; - /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. - static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); + static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(BetterFloat) - sizeof(size_t); // If alignment is imperfect, sizeof(TDigest) will be more than naively expected using Centroids = PODArrayWithStackMemory; Centroids centroids; - Count count = 0; - UInt32 unmerged = 0; + BetterFloat count = 0; + size_t unmerged = 0; + + /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) + */ + static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) + { + double k = (x - x1) / (x2 - x1); + return y1 + k * (y2 - y1); + } struct RadixSortTraits { @@ -111,15 +122,56 @@ class TDigest }; /** Adds a centroid `c` to the digest + * centroid must be valid, validity is checked in add(), deserialize() and is maintained by compress() */ void addCentroid(const Centroid & c) { centroids.push_back(c); count += c.count; ++unmerged; - if (unmerged >= params.max_unmerged) + if (unmerged > params.max_unmerged) compress(); } + void compressBrute() + { + if (centroids.size() <= params.max_centroids) + return; + const size_t batch_size = (centroids.size() + params.max_centroids - 1) / params.max_centroids; // at least 2 + + auto l = centroids.begin(); + auto r = std::next(l); + BetterFloat sum = 0; + BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability + BetterFloat l_count = l->count; + size_t batch_pos = 0; + for (;r != centroids.end(); ++r) + { + if (batch_pos < batch_size - 1) + { + /// The left column "eats" the right. Middle of the batch + l_count += r->count; + l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower + l->mean = l_mean; + l->count = l_count; + batch_pos += 1; + } + else + { + // End of the batch, start the next one + sum += l->count; // Not l_count, otherwise actual sum of elements will be different + ++l; + + /// We skip all the values "eaten" earlier. + *l = *r; + l_mean = l->mean; + l_count = l->count; + batch_pos = 0; + } + } + count = sum + l_count; // Update count, it might be different due to += inaccuracy + centroids.resize(l - centroids.begin() + 1); + // Here centroids.size() <= params.max_centroids + } public: /** Performs compression of accumulated centroids @@ -128,74 +180,92 @@ public: */ void compress() { - if (unmerged > 0) + if (unmerged > 0 || centroids.size() > params.max_centroids) { + // unmerged > 0 implies centroids.size() > 0, hence *l is valid below RadixSort::executeLSD(centroids.data(), centroids.size()); - if (centroids.size() > 3) + /// A pair of consecutive bars of the histogram. + auto l = centroids.begin(); + auto r = std::next(l); + + const BetterFloat count_epsilon_4 = count * params.epsilon * 4; // Compiler is unable to do this optimization + BetterFloat sum = 0; + BetterFloat l_mean = l->mean; // We have high-precision temporaries for numeric stability + BetterFloat l_count = l->count; + while (r != centroids.end()) { - /// A pair of consecutive bars of the histogram. - auto l = centroids.begin(); - auto r = std::next(l); - - Count sum = 0; - while (r != centroids.end()) + if (l->mean == r->mean) // Perfect aggregation (fast). We compare l->mean, not l_mean, to avoid identical elements after compress { - // we use quantile which gives us the smallest error - - /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l. - Value ql = (sum + l->count * 0.5) / count; - Value err = ql * (1 - ql); - - /// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r. - Value qr = (sum + l->count + r->count * 0.5) / count; - Value err2 = qr * (1 - qr); - - if (err > err2) - err = err2; - - Value k = 4 * count * err * params.epsilon; - - /** The ratio of the weight of the glued column pair to all values is not greater, - * than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2), - * and at the edges decreases and is approximately equal to the distance to the edge * 4. - */ - - if (l->count + r->count <= k) - { - // it is possible to merge left and right - /// The left column "eats" the right. - *l += *r; - } - else - { - // not enough capacity, check the next pair - sum += l->count; - ++l; - - /// We skip all the values "eaten" earlier. - if (l != r) - *l = *r; - } + l_count += r->count; + l->count = l_count; ++r; + continue; } + // we use quantile which gives us the smallest error - /// At the end of the loop, all values to the right of l were "eaten". - centroids.resize(l - centroids.begin() + 1); + /// The ratio of the part of the histogram to l, including the half l to the entire histogram. That is, what level quantile in position l. + BetterFloat ql = (sum + l_count * 0.5) / count; + BetterFloat err = ql * (1 - ql); + + /// The ratio of the portion of the histogram to l, including l and half r to the entire histogram. That is, what level is the quantile in position r. + BetterFloat qr = (sum + l_count + r->count * 0.5) / count; + BetterFloat err2 = qr * (1 - qr); + + if (err > err2) + err = err2; + + BetterFloat k = count_epsilon_4 * err; + + /** The ratio of the weight of the glued column pair to all values is not greater, + * than epsilon multiply by a certain quadratic coefficient, which in the median is 1 (4 * 1/2 * 1/2), + * and at the edges decreases and is approximately equal to the distance to the edge * 4. + */ + + if (l_count + r->count <= k) + { + // it is possible to merge left and right + /// The left column "eats" the right. + l_count += r->count; + l_mean += r->count * (r->mean - l_mean) / l_count; // Symmetric algo (M1*C1 + M2*C2)/(C1+C2) is numerically better, but slower + l->mean = l_mean; + l->count = l_count; + } + else + { + // not enough capacity, check the next pair + sum += l->count; // Not l_count, otherwise actual sum of elements will be different + ++l; + + /// We skip all the values "eaten" earlier. + if (l != r) + *l = *r; + l_mean = l->mean; + l_count = l->count; + } + ++r; } + count = sum + l_count; // Update count, it might be different due to += inaccuracy + /// At the end of the loop, all values to the right of l were "eaten". + centroids.resize(l - centroids.begin() + 1); unmerged = 0; } + // Ensures centroids.size() < max_centroids, independent of unprovable floating point blackbox above + compressBrute(); } /** Adds to the digest a change in `x` with a weight of `cnt` (default 1) */ void add(T x, UInt64 cnt = 1) { - addCentroid(Centroid(Value(x), Count(cnt))); + auto vx = static_cast(x); + if (cnt == 0 || std::isnan(vx)) + return; // Count 0 breaks compress() assumptions, Nan breaks sort(). We treat them as no sample. + addCentroid(Centroid{vx, static_cast(cnt)}); } - void merge(const TDigest & other) + void merge(const QuantileTDigest & other) { for (const auto & c : other.centroids) addCentroid(c); @@ -213,89 +283,23 @@ public: size_t size = 0; readVarUInt(size, buf); - if (size > params.max_unmerged) + if (size > max_centroids_deserialize) throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); - centroids.resize(size); - buf.read(reinterpret_cast(centroids.data()), size * sizeof(centroids[0])); - - count = 0; - for (const auto & c : centroids) - count += c.count; - } - - Count getCount() - { - return count; - } - - const Centroids & getCentroids() const - { - return centroids; - } - - void reset() - { - centroids.resize(0); count = 0; unmerged = 0; - } -}; -template -class QuantileTDigest -{ - using Value = Float32; - using Count = Float32; + centroids.resize(size); + // From now, TDigest will be in invalid state if exception is thrown. + buf.read(reinterpret_cast(centroids.data()), size * sizeof(centroids[0])); - /** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold - * we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of - * centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows, - * but merging two big t-digest decreases it). - */ - TDigest main_tdigest; - TDigest sub_tdigest; - size_t merge_threshold = 1e7; - - /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) - */ - static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) - { - double k = (x - x1) / (x2 - x1); - return y1 + k * (y2 - y1); - } - - void mergeTDigests() - { - main_tdigest.merge(sub_tdigest); - sub_tdigest.reset(); - } - -public: - void add(T x, UInt64 cnt = 1) - { - if (sub_tdigest.getCount() >= merge_threshold) - mergeTDigests(); - sub_tdigest.add(x, cnt); - } - - void merge(const QuantileTDigest & other) - { - mergeTDigests(); - main_tdigest.merge(other.main_tdigest); - main_tdigest.merge(other.sub_tdigest); - } - - void serialize(WriteBuffer & buf) - { - mergeTDigests(); - main_tdigest.serialize(buf); - } - - void deserialize(ReadBuffer & buf) - { - sub_tdigest.reset(); - main_tdigest.deserialize(buf); + for (const auto & c : centroids) + { + if (c.count <= 0 || std::isnan(c.count) || std::isnan(c.mean)) // invalid count breaks compress(), invalid mean breaks sort() + throw Exception("Invalid centroid " + std::to_string(c.count) + ":" + std::to_string(c.mean), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + count += c.count; + } + compress(); // Allows reading/writing TDigests with different epsilon/max_centroids params } /** Calculates the quantile q [0, 1] based on the digest. @@ -304,18 +308,15 @@ public: template ResultType getImpl(Float64 level) { - mergeTDigests(); - - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) return std::is_floating_point_v ? NAN : 0; - main_tdigest.compress(); + compress(); if (centroids.size() == 1) return centroids.front().mean; - Float64 x = level * main_tdigest.getCount(); + Float64 x = level * count; Float64 prev_x = 0; Count sum = 0; Value prev_mean = centroids.front().mean; @@ -343,9 +344,6 @@ public: template void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result) { - mergeTDigests(); - - auto & centroids = main_tdigest.getCentroids(); if (centroids.empty()) { for (size_t result_num = 0; result_num < size; ++result_num) @@ -353,7 +351,7 @@ public: return; } - main_tdigest.compress(); + compress(); if (centroids.size() == 1) { @@ -362,7 +360,7 @@ public: return; } - Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount(); + Float64 x = levels[levels_permutation[0]] * count; Float64 prev_x = 0; Count sum = 0; Value prev_mean = centroids.front().mean; @@ -380,7 +378,7 @@ public: if (result_num >= size) return; - x = levels[levels_permutation[result_num]] * main_tdigest.getCount(); + x = levels[levels_permutation[result_num]] * count; } sum += c.count; diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h index 1a696088dd4..6070f264ad6 100644 --- a/src/AggregateFunctions/QuantileTiming.h +++ b/src/AggregateFunctions/QuantileTiming.h @@ -1,15 +1,13 @@ #pragma once +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif namespace DB { @@ -140,7 +138,7 @@ namespace detail using Array = PODArray; mutable Array elems; /// mutable because array sorting is not considered a state change. - QuantileTimingMedium() {} + QuantileTimingMedium() = default; QuantileTimingMedium(const UInt16 * begin, const UInt16 * end) : elems(begin, end) {} void insert(UInt64 x) @@ -182,11 +180,7 @@ namespace detail /// Sorting an array will not be considered a violation of constancy. auto & array = elems; -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin(), array.begin() + n, array.end()); -#else - std::nth_element(array.begin(), array.begin() + n, array.end()); -#endif + nth_element(array.begin(), array.begin() + n, array.end()); quantile = array[n]; } @@ -207,11 +201,7 @@ namespace detail ? level * elems.size() : (elems.size() - 1); -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_select(array.begin() + prev_n, array.begin() + n, array.end()); -#else - std::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); -#endif + nth_element(array.begin() + prev_n, array.begin() + n, array.end()); result[level_index] = array[n]; prev_n = n; @@ -282,7 +272,7 @@ namespace detail } public: - Iterator(const QuantileTimingLarge & parent) + explicit Iterator(const QuantileTimingLarge & parent) : begin(parent.count_small), pos(begin), end(&parent.count_big[BIG_SIZE]) { adjust(); @@ -429,8 +419,8 @@ namespace detail template void getMany(const double * levels, const size_t * indices, size_t size, ResultType * result) const { - const auto indices_end = indices + size; - auto index = indices; + const auto * indices_end = indices + size; + const auto * index = indices; UInt64 pos = std::ceil(count * levels[*index]); diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index e6aedc513f9..f5e64f1471b 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/AggregateFunctions/ya.make.in b/src/AggregateFunctions/ya.make.in index dd49b679d28..4d0c3099bbd 100644 --- a/src/AggregateFunctions/ya.make.in +++ b/src/AggregateFunctions/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 31e88b5d872..0a499540c41 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -73,6 +73,11 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { #if USE_SSL socket = std::make_unique(); + + /// we resolve the ip when we open SecureStreamSocket, so to make Server Name Indication (SNI) + /// work we need to pass host name separately. It will be send into TLS Hello packet to let + /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). + static_cast(socket.get())->setPeerHostName(host); #else throw Exception{"tcp_secure protocol is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif diff --git a/src/Client/ya.make b/src/Client/ya.make index 07cc6725308..87a0cea102a 100644 --- a/src/Client/ya.make +++ b/src/Client/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Client/ya.make.in b/src/Client/ya.make.in index d8faff9ae1a..935643ecd26 100644 --- a/src/Client/ya.make.in +++ b/src/Client/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f5b266b6983..99b3342f314 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum return res; } -MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const +MutableColumnPtr ColumnAggregateFunction::predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const { MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn(); res->reserve(data.size()); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 79e52e5769a..4e5e66542e9 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -119,7 +119,7 @@ public: const char * getFamilyName() const override { return "AggregateFunction"; } TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; } - MutableColumnPtr predictValues(ColumnsWithTypeAndName & arguments, const Context & context) const; + MutableColumnPtr predictValues(const ColumnsWithTypeAndName & arguments, const Context & context) const; size_t size() const override { diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 82d954df334..251c0bd7921 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -20,10 +21,6 @@ #include #include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif - namespace DB { @@ -786,11 +783,7 @@ void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), less); else std::sort(res.begin(), res.end(), less); } @@ -842,11 +835,7 @@ void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRa return; /// Since then we are working inside the interval. -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index 550a44a23a2..72988567a04 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -138,4 +138,12 @@ void ColumnConst::updateWeakHash32(WeakHash32 & hash) const value = intHashCRC32(data_hash, value); } +void ColumnConst::compareColumn( + const IColumn & rhs, size_t, PaddedPODArray *, PaddedPODArray & compare_results, int, int nan_direction_hint) + const +{ + Int8 res = compareAt(1, 1, rhs, nan_direction_hint); + std::fill(compare_results.begin(), compare_results.end(), res); +} + } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 3680926cd9b..9392a1cfaff 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -199,11 +199,7 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, - int direction, int nan_direction_hint) const override - { - return data->compareColumn(rhs, rhs_row_num, row_indexes, - compare_results, direction, nan_direction_hint); - } + int direction, int nan_direction_hint) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 7c3af5fe095..a70c1a6ba6d 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -7,10 +7,8 @@ #include #include +#include #include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif #include @@ -57,32 +55,16 @@ void ColumnDecimal::compareColumn(const IColumn & rhs, size_t rhs_row_num, template StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const { - if constexpr (is_POD) - { - auto * pos = arena.allocContinue(sizeof(T), begin); - memcpy(pos, &data[n], sizeof(T)); - return StringRef(pos, sizeof(T)); - } - else - { - char * pos = arena.allocContinue(BigInt::size, begin); - return BigInt::serialize(data[n], pos); - } + auto * pos = arena.allocContinue(sizeof(T), begin); + memcpy(pos, &data[n], sizeof(T)); + return StringRef(pos, sizeof(T)); } template const char * ColumnDecimal::deserializeAndInsertFromArena(const char * pos) { - if constexpr (is_POD) - { - data.push_back(unalignedLoad(pos)); - return pos + sizeof(T); - } - else - { - data.push_back(BigInt::deserialize(pos)); - return pos + BigInt::size; - } + data.push_back(unalignedLoad(pos)); + return pos + sizeof(T); } template @@ -197,21 +179,11 @@ void ColumnDecimal::updatePermutation(bool reverse, size_t limit, int, IColum /// Since then we are working inside the interval. if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this](size_t a, size_t b) { return data[a] > data[b]; }); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, - [this](size_t a, size_t b) { return data[a] > data[b]; }); -#endif else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this](size_t a, size_t b) { return data[a] < data[b]; }); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, - [this](size_t a, size_t b) { return data[a] > data[b]; }); -#endif auto new_first = first; for (auto j = first + 1; j < limit; ++j) { @@ -264,24 +236,13 @@ MutableColumnPtr ColumnDecimal::cloneResized(size_t size) const new_col.data.resize(size); size_t count = std::min(this->size(), size); - if constexpr (is_POD) - { - memcpy(new_col.data.data(), data.data(), count * sizeof(data[0])); - if (size > count) - { - void * tail = &new_col.data[count]; - memset(tail, 0, (size - count) * sizeof(T)); - } - } - else - { - for (size_t i = 0; i < count; i++) - new_col.data[i] = data[i]; + memcpy(new_col.data.data(), data.data(), count * sizeof(data[0])); - if (size > count) - for (size_t i = count; i < size; i++) - new_col.data[i] = T{}; + if (size > count) + { + void * tail = &new_col.data[count]; + memset(tail, 0, (size - count) * sizeof(T)); } } @@ -291,16 +252,9 @@ MutableColumnPtr ColumnDecimal::cloneResized(size_t size) const template void ColumnDecimal::insertData(const char * src, size_t /*length*/) { - if constexpr (is_POD) - { - T tmp; - memcpy(&tmp, src, sizeof(T)); - data.emplace_back(tmp); - } - else - { - data.push_back(BigInt::deserialize(src)); - } + T tmp; + memcpy(&tmp, src, sizeof(T)); + data.emplace_back(tmp); } template @@ -315,13 +269,8 @@ void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t size_t old_size = data.size(); data.resize(old_size + length); - if constexpr (is_POD) - memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); - else - { - for (size_t i = 0; i < length; i++) - data[old_size + i] = src_vec.data[start + i]; - } + + memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); } template diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index abb49531e89..46f7cfe581e 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -1,25 +1,18 @@ #pragma once -#include - -#include +#include #include #include -#include #include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif +#include +#include +#include + +#include namespace DB { - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - /// PaddedPODArray extended by Decimal scale template class DecimalPaddedPODArray : public PaddedPODArray @@ -57,43 +50,6 @@ private: UInt32 scale; }; -/// std::vector extended by Decimal scale -template -class DecimalVector : public std::vector -{ -public: - using Base = std::vector; - using Base::operator[]; - - DecimalVector(size_t size, UInt32 scale_) - : Base(size), - scale(scale_) - {} - - DecimalVector(const DecimalVector & other) - : Base(other.begin(), other.end()), - scale(other.scale) - {} - - DecimalVector(DecimalVector && other) - { - this->swap(other); - std::swap(scale, other.scale); - } - - DecimalVector & operator=(DecimalVector && other) - { - this->swap(other); - std::swap(scale, other.scale); - return *this; - } - - UInt32 getScale() const { return scale; } - -private: - UInt32 scale; -}; - /// A ColumnVector for Decimals template class ColumnDecimal final : public COWHelper> @@ -107,10 +63,7 @@ private: public: using ValueType = T; using NativeT = typename T::NativeType; - static constexpr bool is_POD = !is_big_int_v; - using Container = std::conditional_t, - DecimalVector>; + using Container = DecimalPaddedPODArray; private: ColumnDecimal(const size_t n, UInt32 scale_) @@ -134,18 +87,8 @@ public: size_t size() const override { return data.size(); } size_t byteSize() const override { return data.size() * sizeof(data[0]); } - size_t allocatedBytes() const override - { - if constexpr (is_POD) - return data.allocated_bytes(); - else - return data.capacity() * sizeof(data[0]); - } - void protect() override - { - if constexpr (is_POD) - data.protect(); - } + size_t allocatedBytes() const override { return data.allocated_bytes(); } + void protect() override { data.protect(); } void reserve(size_t n) override { data.reserve(n); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } @@ -153,38 +96,28 @@ public: void insertDefault() override { data.push_back(T()); } virtual void insertManyDefaults(size_t length) override { - if constexpr (is_POD) - data.resize_fill(data.size() + length); - else - data.resize(data.size() + length); + data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(DB::get>(x)); } void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void popBack(size_t n) override { - if constexpr (is_POD) - data.resize_assume_reserved(data.size() - n); - else - data.resize(data.size() - n); + data.resize_assume_reserved(data.size() - n); } StringRef getRawData() const override { - if constexpr (is_POD) - return StringRef(reinterpret_cast(data.data()), byteSize()); - else - throw Exception("getRawData() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + return StringRef(reinterpret_cast(data.data()), byteSize()); } StringRef getDataAt(size_t n) const override { - if constexpr (is_POD) - return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); - else - throw Exception("getDataAt() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); } + Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo(data[n], scale); } + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; void updateHashWithValue(size_t n, SipHash & hash) const override; @@ -256,17 +189,9 @@ protected: sort_end = res.begin() + limit; if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); -#else - std::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); -#endif + partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] > data[b]; }); else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); -#else - std::partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); -#endif + partial_sort(res.begin(), sort_end, res.end(), [this](size_t a, size_t b) { return data[a] < data[b]; }); } }; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index c1724b37fe2..a20e5d3ca0d 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -1,25 +1,20 @@ #include + #include - -#include -#include -#include -#include -#include -#include -#include - -#include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif - #include - #include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#ifdef __SSE2__ - #include +#if defined(__SSE2__) +# include #endif @@ -160,17 +155,9 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir if (limit) { if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); } else { @@ -228,17 +215,9 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu /// Since then we are working inside the interval. if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); auto new_first = first; for (auto j = first + 1; j < limit; ++j) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index f61062160b1..8af3b240cb9 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -1,20 +1,19 @@ #include -#include + #include +#include #include #include #include -#include #include - +#include +#include #include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif namespace DB { + namespace ErrorCodes { extern const int ILLEGAL_COLUMN; @@ -397,11 +396,7 @@ void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res /// Since then we are working inside the interval. -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 49180919abb..00d6349408f 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -1,18 +1,16 @@ -#include -#include -#include -#include -#include -#include #include + +#include #include #include - +#include +#include +#include +#include +#include +#include #include #include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif namespace DB @@ -317,11 +315,7 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparato auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), less); else std::sort(res.begin(), res.end(), less); } @@ -372,11 +366,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR return; /// Since then we are working inside the interval. -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9130f563735..e3b45ee3d5c 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -1,17 +1,16 @@ #include + #include +#include #include -#include #include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif namespace DB @@ -354,17 +353,9 @@ void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperat limit = 0; if (limit) - { -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); -#endif - } + partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { std::sort(res.begin(), res.end(), less); - } } void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index c02963e4c5a..d768757227b 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -1,28 +1,27 @@ #include "ColumnVector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#if !defined(ARCADIA_BUILD) - #include // Y_IGNORE -#endif -#ifdef __SSE2__ - #include +#include +#include + +#if defined(__SSE2__) +# include #endif namespace DB @@ -158,17 +157,9 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi res[i] = i; if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); -#else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); -#endif + partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); } else { @@ -264,17 +255,9 @@ void ColumnVector::updatePermutation(bool reverse, size_t limit, int nan_dire /// Since then, we are working inside the interval. if (reverse) -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint)); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint)); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint)); else -#if !defined(ARCADIA_BUILD) - miniselect::floyd_rivest_partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint)); -#else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint)); -#endif + partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint)); size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) diff --git a/src/Columns/ColumnVectorHelper.h b/src/Columns/ColumnVectorHelper.h index d805f44218c..36cbfbf640e 100644 --- a/src/Columns/ColumnVectorHelper.h +++ b/src/Columns/ColumnVectorHelper.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB diff --git a/src/Columns/ya.make b/src/Columns/ya.make index 56c25529cfd..f1a8cb9f32e 100644 --- a/src/Columns/ya.make +++ b/src/Columns/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in index 4e667b49b7c..677a5bcbd70 100644 --- a/src/Columns/ya.make.in +++ b/src/Columns/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 9de52a6e3f5..08c275abfc2 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -14,6 +14,8 @@ * In debug build, use small mmap threshold to reproduce more memory * stomping bugs. Along with ASLR it will hopefully detect more issues than * ASan. The program may fail due to the limit on number of memory mappings. + * + * Not too small to avoid too quick exhaust of memory mappings. */ - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 4096; + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384; #endif diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 3d6a2d6f99c..c48e76e1d98 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -9,7 +9,8 @@ M(ReplicatedFetch, "Number of data parts being fetched from replica") \ M(ReplicatedSend, "Number of data parts being sent to replicas") \ M(ReplicatedChecks, "Number of data parts checking for consistency") \ - M(BackgroundPoolTask, "Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping)") \ + M(BackgroundPoolTask, "Number of active tasks in BackgroundProcessingPool (merges, mutations, or replication queue bookkeeping)") \ + M(BackgroundFetchesPoolTask, "Number of active tasks in BackgroundFetchesPool") \ M(BackgroundMovePoolTask, "Number of active tasks in BackgroundProcessingPool for moves") \ M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \ M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \ diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 405b8c60af8..7f54b6bc50e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -519,9 +519,11 @@ M(550, CONDITIONAL_TREE_PARENT_NOT_FOUND) \ M(551, ILLEGAL_PROJECTION_MANIPULATOR) \ M(552, UNRECOGNIZED_ARGUMENTS) \ - M(553, ROCKSDB_ERROR) \ M(553, LZMA_STREAM_ENCODER_FAILED) \ M(554, LZMA_STREAM_DECODER_FAILED) \ + M(555, ROCKSDB_ERROR) \ + M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\ + \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ M(1001, STD_EXCEPTION) \ diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 22e93a2c324..7ceb8569bd1 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index d10d5981d57..1548af50d98 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -115,6 +116,8 @@ void TraceCollector::stop() void TraceCollector::run() { + setThreadName("TraceCollector"); + ReadBufferFromFileDescriptor in(pipe.fds_rw[0]); while (true) diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 4ae26d874fb..5f34a60c34e 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -511,19 +511,30 @@ void TestKeeper::processingThread() if (expired) break; - if (info.watch) - { - auto & watches_type = dynamic_cast(info.request.get()) - ? list_watches - : watches; - - watches_type[info.request->getPath()].emplace_back(std::move(info.watch)); - } ++zxid; info.request->addRootPath(root_path); auto [response, _] = info.request->process(container, zxid); + + if (info.watch) + { + /// To be compatible with real ZooKeeper we add watch if request was successful (i.e. node exists) + /// or if it was exists request which allows to add watches for non existing nodes. + if (response->error == Error::ZOK) + { + auto & watches_type = dynamic_cast(info.request.get()) + ? list_watches + : watches; + + watches_type[info.request->getPath()].emplace_back(std::move(info.watch)); + } + else if (response->error == Error::ZNONODE && dynamic_cast(info.request.get())) + { + watches[info.request->getPath()].emplace_back(std::move(info.watch)); + } + } + if (response->error == Error::ZOK) info.request->processWatches(watches, list_watches); diff --git a/src/Common/ya.make b/src/Common/ya.make index 0d6caa22f3a..71c0edaea95 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() ADDINCL ( diff --git a/src/Common/ya.make.in b/src/Common/ya.make.in index 49c8baa5eec..d913832b91b 100644 --- a/src/Common/ya.make.in +++ b/src/Common/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL ( diff --git a/src/Compression/ya.make b/src/Compression/ya.make index ed762bcd35f..d14f26379c5 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Compression/ya.make.in b/src/Compression/ya.make.in index 3c46b036aa0..a981da821eb 100644 --- a/src/Compression/ya.make.in +++ b/src/Compression/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() ADDINCL( diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 07ec0810f96..cd2855739e2 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -40,7 +40,7 @@ Block::Block(const ColumnsWithTypeAndName & data_) : data{data_} void Block::initializeIndexByName() { for (size_t i = 0, size = data.size(); i < size; ++i) - index_by_name[data[i].name] = i; + index_by_name.emplace(data[i].name, i); } @@ -295,6 +295,20 @@ std::string Block::dumpStructure() const return out.str(); } +std::string Block::dumpIndex() const +{ + WriteBufferFromOwnString out; + bool first = true; + for (const auto & [name, pos] : index_by_name) + { + if (!first) + out << ", "; + first = false; + + out << name << ' ' << pos; + } + return out.str(); +} Block Block::cloneEmpty() const { diff --git a/src/Core/Block.h b/src/Core/Block.h index f05cc2b52eb..eef3c27363b 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -119,6 +119,9 @@ public: /** List of names, types and lengths of columns. Designed for debugging. */ std::string dumpStructure() const; + /** List of column names and positions from index */ + std::string dumpIndex() const; + /** Get the same block, but empty. */ Block cloneEmpty() const; @@ -156,7 +159,7 @@ private: /// This is needed to allow function execution over data. /// It is safe because functions does not change column names, so index is unaffected. /// It is temporary. - friend struct ExpressionAction; + friend class ExpressionActions; friend class ActionsDAG; }; diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h index 674ed31683b..6da1fc00b7c 100644 --- a/src/Core/DecimalComparison.h +++ b/src/Core/DecimalComparison.h @@ -57,6 +57,7 @@ public: using Op = Operation; using ColVecA = std::conditional_t, ColumnDecimal, ColumnVector>; using ColVecB = std::conditional_t, ColumnDecimal, ColumnVector>; + using ArrayA = typename ColVecA::Container; using ArrayB = typename ColVecB::Container; diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 4d7d8e08ac3..9b6578092c9 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -70,7 +70,7 @@ /// Minimum revision supporting OpenTelemetry #define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442 -/// Mininum revision supporting interserver secret. +/// Minimum revision supporting interserver secret. #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 /// Version of ClickHouse TCP protocol. Increment it manually when you change the protocol. diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 9cb21a2d39a..f65fbe62274 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -132,11 +133,19 @@ void MySQLClient::ping() writeCommand(Command::COM_PING, ""); } -void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str) +void MySQLClient::setBinlogChecksum(const String & binlog_checksum) { - /// Set binlog checksum to CRC32. - String checksum = "CRC32"; - writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = '" + checksum + "'"); + replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4); +} + +void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum) +{ + /// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments: + /// Make a notice to the server that this client is checksum-aware. + /// It does not need the first fake Rotate necessary checksummed. + writeCommand(Command::COM_QUERY, "SET @master_binlog_checksum = 'CRC32'"); + + setBinlogChecksum(binlog_checksum); /// Set heartbeat 1s. UInt64 period_ns = (1 * 1e9); diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index a31794acc42..5835e980149 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -29,10 +29,12 @@ public: void disconnect(); void ping(); + void setBinlogChecksum(const String & binlog_checksum); + /// Start replication stream by GTID. /// replicate_db: replication database schema, events from other databases will be ignored. /// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'. - void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid); + void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum); BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0); Position getPosition() const { return replication.getPosition(); } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 6ff1670777a..a33d65fcbd5 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -57,7 +57,6 @@ namespace MySQLReplication payload.readStrict(reinterpret_cast(&create_timestamp), 4); payload.readStrict(reinterpret_cast(&event_header_length), 1); assert(event_header_length == EVENT_HEADER_LENGTH); - readStringUntilEOF(event_type_header_length, payload); } @@ -745,7 +744,7 @@ namespace MySQLReplication // skip the generic response packets header flag. payload.ignore(1); - MySQLBinlogEventReadBuffer event_payload(payload); + MySQLBinlogEventReadBuffer event_payload(payload, checksum_signature_length); EventHeader event_header; event_header.parse(event_payload); diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index 394ac729d1b..bbefb368aaf 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -526,6 +526,8 @@ namespace MySQLReplication virtual BinlogEventPtr readOneEvent() = 0; virtual void setReplicateDatabase(String db) = 0; virtual void setGTIDSets(GTIDSets sets) = 0; + virtual void setChecksumSignatureLength(size_t checksum_signature_length_) = 0; + virtual ~IFlavor() override = default; }; @@ -538,12 +540,14 @@ namespace MySQLReplication BinlogEventPtr readOneEvent() override { return event; } void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); } void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); } + void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; } private: Position position; BinlogEventPtr event; String replicate_do_db; std::shared_ptr table_map; + size_t checksum_signature_length = 4; inline bool do_replicate() { return (replicate_do_db.empty() || table_map->schema == replicate_do_db); } }; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 580756361b1..6836a597047 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -71,6 +71,7 @@ class IColumn; M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \ M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \ M(UInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \ + M(UInt64, background_fetches_pool_size, 3, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \ M(UInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, dns cache updates. Only has meaning at server startup.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "Number of threads performing background tasks for message streaming. Only has meaning at server startup.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \ @@ -440,6 +441,8 @@ class IColumn; M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ + M(Bool, output_format_json_named_tuples_as_objects, false, "Serialize named tuple columns as JSON objects.", 0) \ + M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \ \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ @@ -516,7 +519,7 @@ struct Settings : public BaseSettings }; /* - * User-specified file format settings for File and ULR engines. + * User-specified file format settings for File and URL engines. */ DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) diff --git a/src/Core/Types.h b/src/Core/Types.h index 3157598adc0..28ca7146aaf 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -145,7 +145,7 @@ struct Decimal operator T () const { return value; } template - U convertTo() + U convertTo() const { /// no IsDecimalNumber defined yet if constexpr (std::is_same_v> || diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index a6fc329c8eb..8dc8a4244ac 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -106,12 +106,6 @@ std::ostream & operator<<(std::ostream & stream, const Packet & what) return stream; } -std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what) -{ - stream << "ExpressionAction(" << what.toString() << ")"; - return stream; -} - std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what) { stream << "ExpressionActions(" << what.dumpActions() << ")"; diff --git a/src/Core/iostream_debug_helpers.h b/src/Core/iostream_debug_helpers.h index ef195ed4abf..7568fa6e445 100644 --- a/src/Core/iostream_debug_helpers.h +++ b/src/Core/iostream_debug_helpers.h @@ -40,9 +40,6 @@ std::ostream & operator<<(std::ostream & stream, const IColumn & what); struct Packet; std::ostream & operator<<(std::ostream & stream, const Packet & what); -struct ExpressionAction; -std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what); - class ExpressionActions; std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what); diff --git a/src/Core/tests/mysql_protocol.cpp b/src/Core/tests/mysql_protocol.cpp index 9dc46891241..98555ddcfe0 100644 --- a/src/Core/tests/mysql_protocol.cpp +++ b/src/Core/tests/mysql_protocol.cpp @@ -304,7 +304,8 @@ int main(int argc, char ** argv) "user", boost::program_options::value()->default_value("root"), "master user")( "password", boost::program_options::value()->required(), "master password")( "gtid", boost::program_options::value()->default_value(""), "executed GTID sets")( - "db", boost::program_options::value()->required(), "replicate do db"); + "db", boost::program_options::value()->required(), "replicate do db")( + "binlog_checksum", boost::program_options::value()->default_value("CRC32"), "master binlog_checksum"); boost::program_options::variables_map options; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); @@ -319,6 +320,7 @@ int main(int argc, char ** argv) auto master_password = options.at("password").as(); auto gtid_sets = options.at("gtid").as(); auto replicate_db = options.at("db").as(); + auto binlog_checksum = options.at("binlog_checksum").as(); std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password << ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl; @@ -328,7 +330,7 @@ int main(int argc, char ** argv) /// Connect to the master. slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets); + slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum); WriteBufferFromOStream cerr(std::cerr); diff --git a/src/Core/ya.make b/src/Core/ya.make index d7ba5f8dab9..1eae848163b 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Core/ya.make.in b/src/Core/ya.make.in index b2e82663c1e..e1c679ac809 100644 --- a/src/Core/ya.make.in +++ b/src/Core/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index d4a892711ef..a967ee28502 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -46,7 +46,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) auto * constraint_ptr = constraints.constraints[i]->as(); - ColumnWithTypeAndName res_column = block_to_calculate.getByPosition(block_to_calculate.columns() - 1); + ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName()); if (!isUInt8(res_column.type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8", diff --git a/src/DataStreams/ParallelParsingBlockInputStream.cpp b/src/DataStreams/ParallelParsingBlockInputStream.cpp index 050a0d8ef8a..19b04d36fc1 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.cpp +++ b/src/DataStreams/ParallelParsingBlockInputStream.cpp @@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params & // Subtract one thread that we use for segmentation and one for // reading. After that, must have at least two threads left for // parsing. See the assertion below. - pool(std::max(2, params.max_threads - 2)), + pool(std::max(2, static_cast(params.max_threads) - 2)), file_segmentation_engine(params.file_segmentation_engine) { // See comment above. diff --git a/src/DataStreams/ParallelParsingBlockInputStream.h b/src/DataStreams/ParallelParsingBlockInputStream.h index 4c110f8a937..c882acd9ddd 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/src/DataStreams/ParallelParsingBlockInputStream.h @@ -69,7 +69,7 @@ public: const InputProcessorCreator & input_processor_creator; const InputCreatorParams & input_creator_params; FormatFactory::FileSegmentationEngine file_segmentation_engine; - int max_threads; + size_t max_threads; size_t min_chunk_bytes; }; diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 3baa2b30c3f..1252dd7f4de 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( , context(context_) , query_ptr(query_ptr_) { + checkStackSize(); + /** TODO This is a very important line. At any insertion into the table one of streams should own lock. * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * but it's clear that here is not the best place for this functionality. diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index c1c482b1a73..38479409f84 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -103,6 +103,15 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) const return (ttl && (ttl <= current_time)); } +Block reorderColumns(Block block, const Block & header) +{ + Block res; + for (const auto & col : header) + res.insert(block.getByName(col.name)); + + return res; +} + Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part @@ -136,7 +145,7 @@ Block TTLBlockInputStream::readImpl() updateMovesTTL(block); updateRecompressionTTL(block); - return block; + return reorderColumns(std::move(block), header); } void TTLBlockInputStream::readSuffixImpl() diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index bb6dd3f9357..776578af131 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataStreams/ya.make.in b/src/DataStreams/ya.make.in index 7aa2fe4874e..d6a683daa66 100644 --- a/src/DataStreams/ya.make.in +++ b/src/DataStreams/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 453cb7f37a3..02fc49f7e9a 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -25,12 +25,13 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; - extern const int EMPTY_DATA_PASSED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int DUPLICATE_COLUMN; extern const int BAD_ARGUMENTS; + extern const int DUPLICATE_COLUMN; + extern const int EMPTY_DATA_PASSED; + extern const int LOGICAL_ERROR; extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; } @@ -145,6 +146,20 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl) try { impl(); + + // Check that all columns now have the same size. + size_t new_size = column.size(); + for (auto i : ext::range(1, ext::size(elems))) + { + const auto & element_column = extractElementColumn(column, i); + if (element_column.size() != new_size) + { + // This is not a logical error because it may work with + // user-supplied data. + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH, + "Cannot read a tuple because not all elements are present"); + } + } } catch (...) { @@ -213,37 +228,93 @@ void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const F void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - writeChar('[', ostr); - for (const auto i : ext::range(0, ext::size(elems))) + if (settings.json.named_tuples_as_objects + && have_explicit_names) { - if (i != 0) - writeChar(',', ostr); - elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + writeChar('{', ostr); + for (const auto i : ext::range(0, ext::size(elems))) + { + if (i != 0) + { + writeChar(',', ostr); + } + writeJSONString(names[i], ostr, settings); + writeChar(':', ostr); + elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + } + writeChar('}', ostr); + } + else + { + writeChar('[', ostr); + for (const auto i : ext::range(0, ext::size(elems))) + { + if (i != 0) + writeChar(',', ostr); + elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + } + writeChar(']', ostr); } - writeChar(']', ostr); } void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - const size_t size = elems.size(); - assertChar('[', istr); - - addElementSafe(elems, column, [&] + if (settings.json.named_tuples_as_objects + && have_explicit_names) { - for (const auto i : ext::range(0, size)) - { - skipWhitespaceIfAny(istr); - if (i != 0) - { - assertChar(',', istr); - skipWhitespaceIfAny(istr); - } - elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings); - } - }); + skipWhitespaceIfAny(istr); + assertChar('{', istr); + skipWhitespaceIfAny(istr); - skipWhitespaceIfAny(istr); - assertChar(']', istr); + addElementSafe(elems, column, [&] + { + // Require all elements but in arbitrary order. + for (auto i : ext::range(0, ext::size(elems))) + { + if (i > 0) + { + skipWhitespaceIfAny(istr); + assertChar(',', istr); + skipWhitespaceIfAny(istr); + } + + std::string name; + readDoubleQuotedString(name, istr); + skipWhitespaceIfAny(istr); + assertChar(':', istr); + skipWhitespaceIfAny(istr); + + const size_t element_pos = getPositionByName(name); + auto & element_column = extractElementColumn(column, element_pos); + elems[element_pos]->deserializeAsTextJSON(element_column, istr, settings); + } + }); + + skipWhitespaceIfAny(istr); + assertChar('}', istr); + } + else + { + const size_t size = elems.size(); + assertChar('[', istr); + + addElementSafe(elems, column, [&] + { + for (const auto i : ext::range(0, size)) + { + skipWhitespaceIfAny(istr); + if (i != 0) + { + assertChar(',', istr); + skipWhitespaceIfAny(istr); + } + elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings); + } + }); + + skipWhitespaceIfAny(istr); + assertChar(']', istr); + } } void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 5e25d47534e..c32aba721f4 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -466,75 +466,66 @@ struct WhichDataType { TypeIndex idx; - WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) - : idx(idx_) - {} + constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} + constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} + constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} - WhichDataType(const IDataType & data_type) - : idx(data_type.getTypeId()) - {} + // shared ptr -> is non-constexpr in gcc + WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} - WhichDataType(const IDataType * data_type) - : idx(data_type->getTypeId()) - {} + constexpr bool isUInt8() const { return idx == TypeIndex::UInt8; } + constexpr bool isUInt16() const { return idx == TypeIndex::UInt16; } + constexpr bool isUInt32() const { return idx == TypeIndex::UInt32; } + constexpr bool isUInt64() const { return idx == TypeIndex::UInt64; } + constexpr bool isUInt128() const { return idx == TypeIndex::UInt128; } + constexpr bool isUInt256() const { return idx == TypeIndex::UInt256; } + constexpr bool isUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64() || isUInt128() || isUInt256(); } + constexpr bool isNativeUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64(); } - WhichDataType(const DataTypePtr & data_type) - : idx(data_type->getTypeId()) - {} + constexpr bool isInt8() const { return idx == TypeIndex::Int8; } + constexpr bool isInt16() const { return idx == TypeIndex::Int16; } + constexpr bool isInt32() const { return idx == TypeIndex::Int32; } + constexpr bool isInt64() const { return idx == TypeIndex::Int64; } + constexpr bool isInt128() const { return idx == TypeIndex::Int128; } + constexpr bool isInt256() const { return idx == TypeIndex::Int256; } + constexpr bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128() || isInt256(); } + constexpr bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); } - bool isUInt8() const { return idx == TypeIndex::UInt8; } - bool isUInt16() const { return idx == TypeIndex::UInt16; } - bool isUInt32() const { return idx == TypeIndex::UInt32; } - bool isUInt64() const { return idx == TypeIndex::UInt64; } - bool isUInt128() const { return idx == TypeIndex::UInt128; } - bool isUInt256() const { return idx == TypeIndex::UInt256; } - bool isUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64() || isUInt128() || isUInt256(); } - bool isNativeUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64(); } + constexpr bool isDecimal32() const { return idx == TypeIndex::Decimal32; } + constexpr bool isDecimal64() const { return idx == TypeIndex::Decimal64; } + constexpr bool isDecimal128() const { return idx == TypeIndex::Decimal128; } + constexpr bool isDecimal256() const { return idx == TypeIndex::Decimal256; } + constexpr bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128() || isDecimal256(); } - bool isInt8() const { return idx == TypeIndex::Int8; } - bool isInt16() const { return idx == TypeIndex::Int16; } - bool isInt32() const { return idx == TypeIndex::Int32; } - bool isInt64() const { return idx == TypeIndex::Int64; } - bool isInt128() const { return idx == TypeIndex::Int128; } - bool isInt256() const { return idx == TypeIndex::Int256; } - bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128() || isInt256(); } - bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); } + constexpr bool isFloat32() const { return idx == TypeIndex::Float32; } + constexpr bool isFloat64() const { return idx == TypeIndex::Float64; } + constexpr bool isFloat() const { return isFloat32() || isFloat64(); } - bool isDecimal32() const { return idx == TypeIndex::Decimal32; } - bool isDecimal64() const { return idx == TypeIndex::Decimal64; } - bool isDecimal128() const { return idx == TypeIndex::Decimal128; } - bool isDecimal256() const { return idx == TypeIndex::Decimal256; } - bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128() || isDecimal256(); } + constexpr bool isEnum8() const { return idx == TypeIndex::Enum8; } + constexpr bool isEnum16() const { return idx == TypeIndex::Enum16; } + constexpr bool isEnum() const { return isEnum8() || isEnum16(); } - bool isFloat32() const { return idx == TypeIndex::Float32; } - bool isFloat64() const { return idx == TypeIndex::Float64; } - bool isFloat() const { return isFloat32() || isFloat64(); } + constexpr bool isDate() const { return idx == TypeIndex::Date; } + constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; } + constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; } + constexpr bool isDateOrDateTime() const { return isDate() || isDateTime() || isDateTime64(); } - bool isEnum8() const { return idx == TypeIndex::Enum8; } - bool isEnum16() const { return idx == TypeIndex::Enum16; } - bool isEnum() const { return isEnum8() || isEnum16(); } + constexpr bool isString() const { return idx == TypeIndex::String; } + constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; } + constexpr bool isStringOrFixedString() const { return isString() || isFixedString(); } - bool isDate() const { return idx == TypeIndex::Date; } - bool isDateTime() const { return idx == TypeIndex::DateTime; } - bool isDateTime64() const { return idx == TypeIndex::DateTime64; } - bool isDateOrDateTime() const { return isDate() || isDateTime() || isDateTime64(); } + constexpr bool isUUID() const { return idx == TypeIndex::UUID; } + constexpr bool isArray() const { return idx == TypeIndex::Array; } + constexpr bool isTuple() const { return idx == TypeIndex::Tuple; } + constexpr bool isSet() const { return idx == TypeIndex::Set; } + constexpr bool isInterval() const { return idx == TypeIndex::Interval; } - bool isString() const { return idx == TypeIndex::String; } - bool isFixedString() const { return idx == TypeIndex::FixedString; } - bool isStringOrFixedString() const { return isString() || isFixedString(); } + constexpr bool isNothing() const { return idx == TypeIndex::Nothing; } + constexpr bool isNullable() const { return idx == TypeIndex::Nullable; } + constexpr bool isFunction() const { return idx == TypeIndex::Function; } + constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; } - bool isUUID() const { return idx == TypeIndex::UUID; } - bool isArray() const { return idx == TypeIndex::Array; } - bool isTuple() const { return idx == TypeIndex::Tuple; } - bool isSet() const { return idx == TypeIndex::Set; } - bool isInterval() const { return idx == TypeIndex::Interval; } - - bool isNothing() const { return idx == TypeIndex::Nothing; } - bool isNullable() const { return idx == TypeIndex::Nullable; } - bool isFunction() const { return idx == TypeIndex::Function; } - bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; } - - bool IsBigIntOrDeimal() const { return isInt128() || isInt256() || isUInt256() || isDecimal256(); } + constexpr bool IsBigIntOrDeimal() const { return isInt128() || isInt256() || isUInt256() || isDecimal256(); } }; /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) diff --git a/src/DataTypes/ya.make b/src/DataTypes/ya.make index 91d28a08f22..b570adbdc7a 100644 --- a/src/DataTypes/ya.make +++ b/src/DataTypes/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/DataTypes/ya.make.in b/src/DataTypes/ya.make.in index 05170178925..d93dd32bd8d 100644 --- a/src/DataTypes/ya.make.in +++ b/src/DataTypes/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 3732139c66a..ff5510f0bf9 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -44,10 +44,10 @@ namespace } } -DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & global_context_) +DatabaseDictionary::DatabaseDictionary(const String & name_, const Context & context_) : IDatabase(name_) , log(&Poco::Logger::get("DatabaseDictionary(" + database_name + ")")) - , global_context(global_context_.getGlobalContext()) + , global_context(context_.getGlobalContext()) { } diff --git a/src/Databases/DatabaseDictionary.h b/src/Databases/DatabaseDictionary.h index c3c6a53a894..2cfc6ef3285 100644 --- a/src/Databases/DatabaseDictionary.h +++ b/src/Databases/DatabaseDictionary.h @@ -22,7 +22,7 @@ namespace DB class DatabaseDictionary final : public IDatabase { public: - DatabaseDictionary(const String & name_, const Context & global_context); + DatabaseDictionary(const String & name_, const Context & context_); String getEngineName() const override { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index a1d24226444..24e2bdcd6b2 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -67,14 +67,14 @@ namespace } - void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path) + void tryAttachDictionary(const ASTPtr & query, DatabaseOrdinary & database, const String & metadata_path, const Context & context) { auto & create_query = query->as(); assert(create_query.is_dictionary); try { Poco::File meta_file(metadata_path); - auto config = getDictionaryConfigurationFromAST(create_query, database.getDatabaseName()); + auto config = getDictionaryConfigurationFromAST(create_query, context, database.getDatabaseName()); time_t modification_time = meta_file.getLastModified().epochTime(); database.attachDictionary(create_query.table, DictionaryAttachInfo{query, config, modification_time}); } @@ -190,7 +190,7 @@ void DatabaseOrdinary::loadStoredObjects(Context & context, bool has_force_resto auto create_query = query->as(); if (create_query.is_dictionary) { - tryAttachDictionary(query, *this, getMetadataPath() + name); + tryAttachDictionary(query, *this, getMetadataPath() + name, context); /// Messages, so that it's not boring to wait for the server to load for a long time. logAboutProgress(log, ++dictionaries_processed, total_dictionaries, watch); diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index 6c5173c986f..ee16f4ae15e 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -176,7 +176,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S /// Add a temporary repository containing the dictionary. /// We need this temp repository to try loading the dictionary before actually attaching it to the database. auto temp_repository = external_loader.addConfigRepository(std::make_unique( - getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as()))); + getDatabaseName(), dictionary_metadata_tmp_path, getDictionaryConfigurationFromAST(query->as(), context))); bool lazy_load = context.getConfigRef().getBool("dictionaries_lazy_load", true); if (!lazy_load) @@ -186,7 +186,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S external_loader.load(dict_id.getInternalDictionaryName()); } - auto config = getDictionaryConfigurationFromAST(query->as()); + auto config = getDictionaryConfigurationFromAST(query->as(), context); attachDictionary(dictionary_name, DictionaryAttachInfo{query, config, time(nullptr)}); SCOPE_EXIT({ if (!succeeded) diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp index 03d218d132f..45483055739 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp @@ -13,6 +13,7 @@ # include # include # include +# include # include # include # include diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 3c5bfdec594..c001955a8ae 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { @@ -19,6 +20,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int SYNC_MYSQL_USER_ACCESS_ERROR; } static std::unordered_map fetchTablesCreateQuery( @@ -64,6 +66,7 @@ static std::vector fetchTablesInDB(const mysqlxx::PoolWithFailover::Entr return tables_in_db; } + void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection) { Block header{ @@ -88,6 +91,29 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c executed_gtid_set = (*master_status.getByPosition(4).column)[0].safeGet(); } +void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection) +{ + Block variables_header{ + {std::make_shared(), "Variable_name"}, + {std::make_shared(), "Value"} + }; + + const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'"; + MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE); + + while (Block variables_block = variables_input.read()) + { + ColumnPtr variables_name = variables_block.getByName("Variable_name").column; + ColumnPtr variables_value = variables_block.getByName("Value").column; + + for (size_t index = 0; index < variables_block.rows(); ++index) + { + if (variables_name->getDataAt(index) == "binlog_checksum") + binlog_checksum = variables_value->getDataAt(index).toString(); + } + } +} + static Block getShowMasterLogHeader(const String & mysql_version) { if (startsWith(mysql_version, "5.")) @@ -105,6 +131,49 @@ static Block getShowMasterLogHeader(const String & mysql_version) }; } +static bool checkSyncUserPrivImpl(mysqlxx::PoolWithFailover::Entry & connection, WriteBuffer & out) +{ + Block sync_user_privs_header + { + {std::make_shared(), "current_user_grants"} + }; + + String grants_query, sub_privs; + MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, DEFAULT_BLOCK_SIZE); + while (Block block = input.read()) + { + for (size_t index = 0; index < block.rows(); ++index) + { + grants_query = (*block.getByPosition(0).column)[index].safeGet(); + out << grants_query << "; "; + sub_privs = grants_query.substr(0, grants_query.find(" ON ")); + if (sub_privs.find("ALL PRIVILEGES") == std::string::npos) + { + if ((sub_privs.find("RELOAD") != std::string::npos and + sub_privs.find("REPLICATION SLAVE") != std::string::npos and + sub_privs.find("REPLICATION CLIENT") != std::string::npos)) + return true; + } + else + { + return true; + } + } + } + return false; +} + +static void checkSyncUserPriv(mysqlxx::PoolWithFailover::Entry & connection) +{ + WriteBufferFromOwnString out; + + if (!checkSyncUserPrivImpl(connection, out)) + throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs " + "at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' " + "and SELECT PRIVILEGE on MySQL Database." + "But the SYNC USER grant query is: " + out.str(), ErrorCodes::SYNC_MYSQL_USER_ACCESS_ERROR); +} + bool MaterializeMetadata::checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const { MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", getShowMasterLogHeader(mysql_version), DEFAULT_BLOCK_SIZE); @@ -167,6 +236,8 @@ MaterializeMetadata::MaterializeMetadata( const String & database, bool & opened_transaction, const String & mysql_version) : persistent_path(path_) { + checkSyncUserPriv(connection); + if (Poco::File(persistent_path).exists()) { ReadBufferFromFile in(persistent_path, DBMS_DEFAULT_BUFFER_SIZE); @@ -193,6 +264,7 @@ MaterializeMetadata::MaterializeMetadata( locked_tables = true; fetchMasterStatus(connection); + fetchMasterVariablesValue(connection); connection->query("SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;").execute(); connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute(); diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index 5e77620e365..94dfc73e5df 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -34,10 +34,13 @@ struct MaterializeMetadata size_t data_version = 1; size_t meta_version = 2; + String binlog_checksum = "CRC32"; std::unordered_map need_dumping_tables; void fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & connection); + void fetchMasterVariablesValue(const mysqlxx::PoolWithFailover::Entry & connection); + bool checkBinlogFileExists(mysqlxx::PoolWithFailover::Entry & connection, const String & mysql_version) const; void transaction(const MySQLReplication::Position & position, const std::function & fun); diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp index 7e42b2548b0..b8f13f4ed18 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp @@ -5,7 +5,6 @@ #if USE_MYSQL #include - # include # include # include @@ -34,6 +33,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_MYSQL_VARIABLE; + extern const int SYNC_MYSQL_USER_ACCESS_ERROR; + extern const int UNKNOWN_DATABASE; } static constexpr auto MYSQL_BACKGROUND_THREAD_NAME = "MySQLDBSync"; @@ -214,10 +215,33 @@ void MaterializeMySQLSyncThread::stopSynchronization() void MaterializeMySQLSyncThread::startSynchronization() { - const auto & mysql_server_version = checkVariableAndGetVersion(pool.get()); + try + { + const auto & mysql_server_version = checkVariableAndGetVersion(pool.get()); - background_thread_pool = std::make_unique( - [this, mysql_server_version = mysql_server_version]() { synchronization(mysql_server_version); }); + background_thread_pool = std::make_unique( + [this, mysql_server_version = mysql_server_version]() { synchronization(mysql_server_version); }); + } + catch (...) + { + try + { + throw; + } + catch (mysqlxx::ConnectionFailed & e) + { + if (e.errnum() == ER_ACCESS_DENIED_ERROR + || e.errnum() == ER_DBACCESS_DENIED_ERROR) + throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs " + "at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' " + "and SELECT PRIVILEGE on Database " + mysql_database_name + , ErrorCodes::SYNC_MYSQL_USER_ACCESS_ERROR); + else if (e.errnum() == ER_BAD_DB_ERROR) + throw Exception("Unknown database '" + mysql_database_name + "' on MySQL", ErrorCodes::UNKNOWN_DATABASE); + else + throw; + } + } } static inline void cleanOutdatedTables(const String & database_name, const Context & context) @@ -340,7 +364,7 @@ std::optional MaterializeMySQLSyncThread::prepareSynchroniz connection->query("COMMIT").execute(); client.connect(); - client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set); + client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set, metadata.binlog_checksum); return metadata; } catch (...) @@ -624,16 +648,27 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr metadata.transaction(position_before_ddl, [&]() { buffers.commit(global_context); }); metadata.transaction(client.getPosition(),[&](){ executeDDLAtomic(query_event); }); } - else if (receive_event->header.type != HEARTBEAT_EVENT) + else { - const auto & dump_event_message = [&]() + /// MYSQL_UNHANDLED_EVENT + if (receive_event->header.type == ROTATE_EVENT) { - WriteBufferFromOwnString buf; - receive_event->dump(buf); - return buf.str(); - }; + /// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file. + /// To ensure that the synchronization continues, we need to handle these events + metadata.fetchMasterVariablesValue(pool.get()); + client.setBinlogChecksum(metadata.binlog_checksum); + } + else if (receive_event->header.type != HEARTBEAT_EVENT) + { + const auto & dump_event_message = [&]() + { + WriteBufferFromOwnString buf; + receive_event->dump(buf); + return buf.str(); + }; - LOG_DEBUG(log, "Skip MySQL event: \n {}", dump_event_message()); + LOG_DEBUG(log, "Skip MySQL event: \n {}", dump_event_message()); + } } } diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.h b/src/Databases/MySQL/MaterializeMySQLSyncThread.h index 9a0df4823e5..323ae5beb80 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.h @@ -20,6 +20,7 @@ # include # include + namespace DB { @@ -63,6 +64,12 @@ private: MaterializeMySQLSettings * settings; String query_prefix; + // USE MySQL ERROR CODE: + // https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html + const int ER_ACCESS_DENIED_ERROR = 1045; + const int ER_DBACCESS_DENIED_ERROR = 1044; + const int ER_BAD_DB_ERROR = 1049; + struct Buffers { String database; diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 0c6cfae29c3..0dc44386088 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Databases/ya.make.in b/src/Databases/ya.make.in index ce7cd88b272..0c5692a9bfa 100644 --- a/src/Databases/ya.make.in +++ b/src/Databases/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 8199b16a94b..8d733bcd90a 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -17,13 +17,14 @@ namespace DB { -namespace ErrorCodes -{ -} - static const size_t MAX_CONNECTIONS = 16; +inline static UInt16 getPortFromContext(const Context & context, bool secure) +{ + return secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort(); +} + static ConnectionPoolWithFailoverPtr createPool( const std::string & host, UInt16 port, @@ -59,10 +60,10 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( const std::string & default_database) : update_time{std::chrono::system_clock::from_time_t(0)} , dict_struct{dict_struct_} - , host{config.getString(config_prefix + ".host")} - , port(config.getInt(config_prefix + ".port")) , secure(config.getBool(config_prefix + ".secure", false)) - , user{config.getString(config_prefix + ".user", "")} + , host{config.getString(config_prefix + ".host", "localhost")} + , port(config.getInt(config_prefix + ".port", getPortFromContext(context_, secure))) + , user{config.getString(config_prefix + ".user", "default")} , password{config.getString(config_prefix + ".password", "")} , db{config.getString(config_prefix + ".db", default_database)} , table{config.getString(config_prefix + ".table")} @@ -72,7 +73,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , sample_block{sample_block_} , context(context_) - , is_local{isLocalAddress({host, port}, secure ? context.getTCPPortSecure().value_or(0) : context.getTCPPort())} + , is_local{isLocalAddress({host, port}, getPortFromContext(context_, secure))} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password)} , load_all_query{query_builder.composeLoadAllQuery()} { @@ -92,9 +93,9 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other) : update_time{other.update_time} , dict_struct{other.dict_struct} + , secure{other.secure} , host{other.host} , port{other.port} - , secure{other.secure} , user{other.user} , password{other.password} , db{other.db} diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 8017d458b7e..9ef77d061fd 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -61,9 +61,9 @@ private: std::chrono::time_point update_time; const DictionaryStructure dict_struct; + const bool secure; const std::string host; const UInt16 port; - const bool secure; const std::string user; const std::string password; const std::string db; diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index c33b7b5a3ae..ad19d7c20ea 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -62,7 +62,7 @@ DictionaryPtr DictionaryFactory::create( DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, const Context & context) const { - auto configuration = getDictionaryConfigurationFromAST(ast); + auto configuration = getDictionaryConfigurationFromAST(ast, context); return DictionaryFactory::create(name, *configuration, "dictionary", context, true); } diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 793d8da7390..832c30ed4b7 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -120,7 +120,7 @@ XDBCDictionarySource::XDBCDictionarySource( , invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")} , bridge_helper{bridge_} , timeouts{ConnectionTimeouts::getHTTPTimeouts(context_)} - , global_context(context_) + , global_context(context_.getGlobalContext()) { bridge_url = bridge_helper->getMainURI(); diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 430c1d591dd..40e86d590c4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -356,7 +357,8 @@ NamesToTypeNames buildDictionaryAttributesConfiguration( void buildConfigurationFromFunctionWithKeyValueArguments( AutoPtr doc, AutoPtr root, - const ASTExpressionList * ast_expr_list) + const ASTExpressionList * ast_expr_list, + const Context & context) { const auto & children = ast_expr_list->children; for (size_t i = 0; i != children.size(); ++i) @@ -365,19 +367,30 @@ void buildConfigurationFromFunctionWithKeyValueArguments( AutoPtr current_xml_element(doc->createElement(pair->first)); root->appendChild(current_xml_element); - if (const auto * identifier = pair->second->as(); identifier) + if (const auto * identifier = pair->second->as()) { AutoPtr value(doc->createTextNode(identifier->name())); current_xml_element->appendChild(value); } - else if (const auto * literal = pair->second->as(); literal) + else if (const auto * literal = pair->second->as()) { AutoPtr value(doc->createTextNode(getFieldAsString(literal->value))); current_xml_element->appendChild(value); } - else if (const auto * list = pair->second->as(); list) + else if (const auto * list = pair->second->as()) { - buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list); + buildConfigurationFromFunctionWithKeyValueArguments(doc, current_xml_element, list, context); + } + else if (const auto * func = pair->second->as()) + { + auto builder = FunctionFactory::instance().tryGet(func->name, context); + auto function = builder->build({}); + auto result = function->execute({}, {}, 0); + + Field value; + result->get(0, value); + AutoPtr text_value(doc->createTextNode(getFieldAsString(value))); + current_xml_element->appendChild(text_value); } else { @@ -406,13 +419,14 @@ void buildSourceConfiguration( AutoPtr doc, AutoPtr root, const ASTFunctionWithKeyValueArguments * source, - const ASTDictionarySettings * settings) + const ASTDictionarySettings * settings, + const Context & context) { AutoPtr outer_element(doc->createElement("source")); root->appendChild(outer_element); AutoPtr source_element(doc->createElement(source->name)); outer_element->appendChild(source_element); - buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as()); + buildConfigurationFromFunctionWithKeyValueArguments(doc, source_element, source->elements->as(), context); if (settings != nullptr) { @@ -466,7 +480,8 @@ void checkPrimaryKey(const NamesToTypeNames & all_attrs, const Names & key_attrs } -DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuery & query, const std::string & database_) +DictionaryConfigurationPtr +getDictionaryConfigurationFromAST(const ASTCreateQuery & query, const Context & context, const std::string & database_) { checkAST(query); @@ -510,7 +525,7 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list); buildLayoutConfiguration(xml_document, current_dictionary, dictionary_layout); - buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings); + buildSourceConfiguration(xml_document, current_dictionary, query.dictionary->source, query.dictionary->dict_settings, context); buildLifetimeConfiguration(xml_document, current_dictionary, query.dictionary->lifetime); if (query.dictionary->range) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.h b/src/Dictionaries/getDictionaryConfigurationFromAST.h index 3038f450914..5132e3c77e0 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.h +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.h @@ -10,5 +10,6 @@ using DictionaryConfigurationPtr = Poco::AutoPtr -#include +#include +#include +#include #include #include #include #include #include -#include -#include +#include +#include +#include #include @@ -47,7 +49,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); /// name EXPECT_EQ(config->getString("dictionary.database"), "test"); @@ -115,7 +117,7 @@ TEST(ConvertDictionaryAST, TrickyAttributes) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); Poco::Util::AbstractConfiguration::Keys keys; config->keys("dictionary.structure", keys); @@ -160,7 +162,7 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); Poco::Util::AbstractConfiguration::Keys keys; config->keys("dictionary.structure.key", keys); @@ -211,7 +213,7 @@ TEST(ConvertDictionaryAST, ComplexSource) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create); + DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); /// source EXPECT_EQ(config->getString("dictionary.source.mysql.host"), "localhost"); EXPECT_EQ(config->getInt("dictionary.source.mysql.port"), 9000); diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index d12db283cae..107d8871e84 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Dictionaries/ya.make.in b/src/Dictionaries/ya.make.in index 2c0735d38a4..d11ab4b0840 100644 --- a/src/Dictionaries/ya.make.in +++ b/src/Dictionaries/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Disks/S3/ya.make b/src/Disks/S3/ya.make index 17425f6e69a..f4df540168d 100644 --- a/src/Disks/S3/ya.make +++ b/src/Disks/S3/ya.make @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Disks/ya.make b/src/Disks/ya.make index ea204ff09ec..5b3e4f951dc 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Disks/ya.make.in b/src/Disks/ya.make.in index ee13bb272cd..ce205bd25ac 100644 --- a/src/Disks/ya.make.in +++ b/src/Disks/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 1ff2f0e2a9b..da63151613e 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -78,7 +78,9 @@ FormatSettings getFormatSettings(const Context & context, format_settings.import_nested_json = settings.input_format_import_nested_json; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; + format_settings.json.array_of_rows = settings.output_format_json_array_of_rows; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes; + format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; @@ -160,6 +162,9 @@ BlockInputStreamPtr FormatFactory::getInput( // (segmentator + two parsers + reader). bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4; + if (settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage) + parallel_parsing = false; + if (parallel_parsing && name == "JSONEachRow") { /// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix() @@ -188,7 +193,7 @@ BlockInputStreamPtr FormatFactory::getInput( row_input_format_params, format_settings}; ParallelParsingBlockInputStream::Params params{buf, input_getter, input_creator_params, file_segmentation_engine, - static_cast(settings.max_threads), + settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing}; return std::make_shared(params); } @@ -334,7 +339,6 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm target = std::move(file_segmentation_engine); } - FormatFactory & FormatFactory::instance() { static FormatFactory ret; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b3c01ddcf14..8fe3756e012 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -86,9 +86,11 @@ struct FormatSettings struct { + bool array_of_rows = false; bool quote_64bit_integers = true; bool quote_denormals = true; bool escape_forward_slashes = true; + bool named_tuples_as_objects = false; bool serialize_as_strings = false; } json; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 96b2c4ee384..89fb7c6cc02 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -15,7 +15,6 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory); void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); -void registerFileSegmentationEngineLineAsString(FormatFactory & factory); /// Formats for both input/output. @@ -90,7 +89,6 @@ void registerFormats() registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineRegexp(factory); registerFileSegmentationEngineJSONAsString(factory); - registerFileSegmentationEngineLineAsString(factory); registerInputFormatNative(factory); registerOutputFormatNative(factory); diff --git a/src/Formats/ya.make b/src/Formats/ya.make index 2dc3adc021d..6b72ec397d5 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -1,4 +1,6 @@ # This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Formats/ya.make.in b/src/Formats/ya.make.in index f7d03e7b00f..027e04ee650 100644 --- a/src/Formats/ya.make.in +++ b/src/Formats/ya.make.in @@ -1,3 +1,5 @@ +OWNER(g:clickhouse) + LIBRARY() PEERDIR( diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 53baaff8db9..afcbadc835c 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -116,7 +116,7 @@ template struct CustomWeekTransformImpl { template - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) { const auto op = Transformer{std::move(transform)}; diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 52cc43c3847..4ad99b528ea 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -683,7 +683,7 @@ struct Transformer template struct DateTimeTransformImpl { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) { using Op = Transformer; diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index adc131053e2..4bc2a779cf4 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -91,7 +91,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column_string = arguments[0].column; const ColumnString * input = checkAndGetColumn(column_string.get()); diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 43ff42956cd..8c684d3578f 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -613,7 +613,7 @@ class FunctionBinaryArithmetic : public IFunction } /// Multiply aggregation state by integer constant: by merging it with itself specified number of times. - ColumnPtr executeAggregateMultiply(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const + ColumnPtr executeAggregateMultiply(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { ColumnsWithTypeAndName new_arguments = arguments; if (WhichDataType(new_arguments[1].type).isAggregateFunction()) @@ -680,7 +680,7 @@ class FunctionBinaryArithmetic : public IFunction } /// Merge two aggregation states together. - ColumnPtr executeAggregateAddition(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const + ColumnPtr executeAggregateAddition(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { const IColumn & lhs_column = *arguments[0].column; const IColumn & rhs_column = *arguments[1].column; @@ -712,7 +712,7 @@ class FunctionBinaryArithmetic : public IFunction return column_to; } - ColumnPtr executeDateTimeIntervalPlusMinus(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, + ColumnPtr executeDateTimeIntervalPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { ColumnsWithTypeAndName new_arguments = arguments; @@ -847,7 +847,7 @@ public: return type_res; } - ColumnPtr executeFixedString(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeFixedString(const ColumnsWithTypeAndName & arguments) const { using OpImpl = FixedStringOperationImpl>; @@ -923,7 +923,7 @@ public: } template - ColumnPtr executeNumeric(ColumnsWithTypeAndName & arguments, const A & left, const B & right) const + ColumnPtr executeNumeric(const ColumnsWithTypeAndName & arguments, const A & left, const B & right) const { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; @@ -1047,7 +1047,7 @@ public: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// Special case when multiply aggregate function state if (isAggregateMultiply(arguments[0].type, arguments[1].type)) @@ -1181,7 +1181,7 @@ public: { } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (left.column && isColumnConst(*left.column) && arguments.size() == 1) { @@ -1206,11 +1206,7 @@ public: bool hasInformationAboutMonotonicity() const override { std::string_view name_ = Name::name; - if (name_ == "minus" || name_ == "plus" || name_ == "divide" || name_ == "intDiv") - { - return true; - } - return false; + return (name_ == "minus" || name_ == "plus" || name_ == "divide" || name_ == "intDiv"); } Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 0c8b803bd22..6d527c66390 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -54,7 +54,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const auto * value_col = arguments.front().column.get(); @@ -75,7 +75,7 @@ public: private: template ColumnPtr execute( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const IColumn * const value_col_untyped) const { if (const auto value_col = checkAndGetColumn>(value_col_untyped)) diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index 74d6a2b5182..8a343cffb95 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -96,7 +96,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index bf2d20ceba7..70e2616eeac 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -305,7 +305,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -463,7 +463,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 2d2e4a7ad6f..e0676f3dc0f 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -95,7 +95,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); diff --git a/src/Functions/FunctionFQDN.cpp b/src/Functions/FunctionFQDN.cpp index b47675d63b4..7b3b89eb511 100644 --- a/src/Functions/FunctionFQDN.cpp +++ b/src/Functions/FunctionFQDN.cpp @@ -34,7 +34,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override { return result_type->createColumnConst( input_rows_count, getFQDNOrHostName())->convertToFullColumnIfConst(); diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 4e97951fbc0..6b15bf821b2 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes } template -ColumnPtr ExecutableFunctionJoinGet::execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) +ColumnPtr ExecutableFunctionJoinGet::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const { ColumnsWithTypeAndName keys; for (size_t i = 2; i < arguments.size(); ++i) diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 780b59e20f4..27f348e9698 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -24,7 +24,7 @@ public: bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) override; + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override; String getName() const override { return name; } diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index ea222379e1c..0a0688dc75c 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -204,7 +204,7 @@ private: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_left = arguments[0]; const ColumnWithTypeAndName & col_right = arguments[1]; diff --git a/src/Functions/FunctionMathConstFloat64.h b/src/Functions/FunctionMathConstFloat64.h index 42729d5e9f6..f03f469bc35 100644 --- a/src/Functions/FunctionMathConstFloat64.h +++ b/src/Functions/FunctionMathConstFloat64.h @@ -25,7 +25,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override { return result_type->createColumnConst(input_rows_count, Impl::value); } diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index abf38d277f3..49b0428811a 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -148,7 +148,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col = arguments[0]; ColumnPtr res; diff --git a/src/Functions/FunctionNumericPredicate.h b/src/Functions/FunctionNumericPredicate.h index 825a8b0de15..72a17adac4c 100644 --- a/src/Functions/FunctionNumericPredicate.h +++ b/src/Functions/FunctionNumericPredicate.h @@ -46,7 +46,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto * in = arguments.front().column.get(); diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 612e0b3b046..2899bc259d5 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -63,7 +63,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * haystack_column = arguments[0].column.get(); const IColumn * needle_column = arguments[1].column.get(); @@ -159,7 +159,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index f806106560c..6330d8f90d6 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -50,7 +50,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index 4ec85591726..bd8edbf9202 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -52,7 +52,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr column_src = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; diff --git a/src/Functions/FunctionStringToString.h b/src/Functions/FunctionStringToString.h index db85e85a053..4123b41c547 100644 --- a/src/Functions/FunctionStringToString.h +++ b/src/Functions/FunctionStringToString.h @@ -52,7 +52,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index e62781cc3a1..389c171bfce 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -154,7 +154,7 @@ public: return result; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { ColumnPtr result_column; bool valid = castType(arguments[0].type.get(), [&](const auto & type) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 2a5dee7734a..20e225990bd 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -65,7 +65,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { using SourceColumnType = typename SourceDataType::ColumnType; using ResultColumnType = typename ResultDataType::ColumnType; diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 68d8b41407d..5a5c5dc05b0 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -178,7 +178,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { using namespace OpenSSLDetails; @@ -448,7 +448,7 @@ private: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { using namespace OpenSSLDetails; diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index ec43ae6351f..93da4906658 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -122,7 +122,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override { const IDataType * from_type = arguments[0].type.get(); const auto * array_type = typeid_cast(from_type); @@ -146,7 +146,7 @@ public: private: template - ColumnPtr executeBitmapData(DataTypes & argument_types, ColumnsWithTypeAndName & arguments) const + ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const { // input data const ColumnArray * array = typeid_cast(arguments[0].column.get()); @@ -207,7 +207,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { // input data const auto & return_type = result_type; @@ -240,7 +240,7 @@ private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, IColumn & res_data_col, ColumnArray::Offsets & res_offsets) const { const ColumnAggregateFunction * column @@ -299,7 +299,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -321,7 +321,7 @@ private: using ToType = UInt64; template - ColumnPtr executeIntType(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeIntType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const IColumn * column_ptrs[3]; bool is_column_const[3]; @@ -417,7 +417,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); for (size_t i = 0; i < 2; ++i) { - auto array_type = typeid_cast(arguments[i + 1].get()); + const auto * array_type = typeid_cast(arguments[i + 1].get()); String msg(i == 0 ? "Second" : "Third"); msg += " argument for function " + getName() + " must be an UInt32 array but it has type " + arguments[i + 1]->getName() + "."; if (!array_type) @@ -433,7 +433,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -455,7 +455,7 @@ private: using ToType = UInt64; template - ColumnPtr executeIntType(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeIntType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const IColumn * column_ptrs[3]; bool is_column_const[3]; @@ -565,7 +565,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -593,7 +593,7 @@ private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const ColumnAggregateFunction * column = typeid_cast(arguments[0].column.get()); @@ -735,7 +735,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -761,7 +761,7 @@ public: private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const IColumn * column_ptrs[2]; bool is_column_const[2]; @@ -832,7 +832,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -858,7 +858,7 @@ public: private: template void executeIntType( - ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const + const ColumnsWithTypeAndName & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) const { const ColumnAggregateFunction * column_ptrs[2]; bool is_column_const[2]; @@ -967,7 +967,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); @@ -987,7 +987,7 @@ public: private: template - ColumnPtr executeBitmapData(ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeBitmapData(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const ColumnAggregateFunction * column_ptrs[2]; bool is_column_const[2]; diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h index 6ae75318f72..ac3262f5131 100644 --- a/src/Functions/FunctionsCoding.h +++ b/src/Functions/FunctionsCoding.h @@ -88,7 +88,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -168,7 +168,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -277,7 +277,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -339,7 +339,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -407,7 +407,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -460,7 +460,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -578,7 +578,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -688,7 +688,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -755,7 +755,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -857,7 +857,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -1187,7 +1187,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; @@ -1255,7 +1255,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; @@ -1335,7 +1335,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); @@ -1461,7 +1461,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * in_column = arguments[0].column.get(); ColumnPtr out_column; @@ -1599,7 +1599,7 @@ public: } } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; @@ -1668,7 +1668,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name_ip = arguments[0]; const ColumnPtr & column_ip = col_type_name_ip.column; @@ -1782,7 +1782,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name_ip = arguments[0]; const ColumnPtr & column_ip = col_type_name_ip.column; diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 057f52501e5..e674f8690ff 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1136,7 +1136,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & col_with_type_and_name_left = arguments[0]; const auto & col_with_type_and_name_right = arguments[1]; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index edadfd659e2..faf66579fc4 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -65,7 +65,7 @@ public: return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (isColumnConst(*arguments[1].column)) return executeConstBuckets(arguments); @@ -93,7 +93,7 @@ private: return static_cast(buckets); } - ColumnPtr executeConstBuckets(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeConstBuckets(const ColumnsWithTypeAndName & arguments) const { Field buckets_field = (*arguments[1].column)[0]; BucketsType num_buckets; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 70e8904cfc1..f9f614e47f2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ struct ConvertImpl template static ColumnPtr NO_SANITIZE_UNDEFINED execute( - ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/, + const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/, Additions additions [[maybe_unused]] = Additions()) { const ColumnWithTypeAndName & named_from = arguments[0]; @@ -153,6 +154,9 @@ struct ConvertImpl { if constexpr (std::is_same_v || std::is_same_v) throw Exception("Unexpected UInt128 to big int conversion", ErrorCodes::NOT_IMPLEMENTED); + /// If From Data is Nan or Inf, throw exception + else if (!isFinite(vec_from[i])) + throw Exception("Unexpected inf or nan to big int conversion", ErrorCodes::NOT_IMPLEMENTED); else vec_to[i] = bigint_cast(vec_from[i]); } @@ -442,7 +446,7 @@ struct FormatImpl> template struct ConvertImpl, DataTypeNumber, Name> { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { return arguments[0].column; } @@ -455,7 +459,7 @@ struct ConvertImpl, ColumnDecimal, ColumnVector>; - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { const auto & col_with_type_and_name = arguments[0]; const auto & type = static_cast(*col_with_type_and_name.type); @@ -509,7 +513,7 @@ struct ConvertImpl - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, Additions additions [[maybe_unused]] = Additions()) { using ColVecTo = typename ToDataType::ColumnType; @@ -932,7 +936,7 @@ struct ConvertImpl template struct ConvertImpl, T, Name> { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { return arguments[0].column; } @@ -945,7 +949,7 @@ struct ConvertImpl, T, Name> template struct ConvertImpl { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) { if (const ColumnFixedString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -1141,7 +1145,7 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool canBeExecutedOnDefaultArguments() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { try { @@ -1186,7 +1190,7 @@ public: } private: - ColumnPtr executeInternal(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { if (arguments.empty()) throw Exception{"Function " + getName() + " expects at least 1 arguments", @@ -1406,7 +1410,7 @@ public: } template - ColumnPtr executeInternal(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const { const IDataType * from_type = arguments[0].type.get(); @@ -1424,7 +1428,7 @@ public: return nullptr; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr result_column; @@ -1874,20 +1878,37 @@ class ExecutableFunctionCast : public IExecutableFunctionImpl public: using WrapperType = std::function; - explicit ExecutableFunctionCast(WrapperType && wrapper_function_, const char * name_) - : wrapper_function(std::move(wrapper_function_)), name(name_) {} + struct Diagnostic + { + std::string column_from; + std::string column_to; + }; + + explicit ExecutableFunctionCast( + WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) + : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} String getName() const override { return name; } protected: - ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) override + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// drop second argument, pass others ColumnsWithTypeAndName new_arguments{arguments.front()}; if (arguments.size() > 2) new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); - return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); + try + { + return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } } bool useDefaultImplementationForNulls() const override { return false; } @@ -1898,6 +1919,7 @@ protected: private: WrapperType wrapper_function; const char * name; + std::optional diagnostic; }; @@ -1908,11 +1930,12 @@ class FunctionCast final : public IFunctionBaseImpl public: using WrapperType = std::function; using MonotonicityForRange = std::function; + using Diagnostic = ExecutableFunctionCast::Diagnostic; FunctionCast(const char * name_, MonotonicityForRange && monotonicity_for_range_ - , const DataTypes & argument_types_, const DataTypePtr & return_type_) - : name(name_), monotonicity_for_range(monotonicity_for_range_) - , argument_types(argument_types_), return_type(return_type_) + , const DataTypes & argument_types_, const DataTypePtr & return_type_, std::optional diagnostic_) + : name(name_), monotonicity_for_range(monotonicity_for_range_) + , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) { } @@ -1921,8 +1944,18 @@ public: ExecutableFunctionImplPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override { - return std::make_unique( - prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), name); + try + { + return std::make_unique( + prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), name, diagnostic); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } } String getName() const override { return name; } @@ -1948,6 +1981,8 @@ private: DataTypes argument_types; DataTypePtr return_type; + std::optional diagnostic; + template WrapperType createWrapper(const DataTypePtr & from_type, const DataType * const, bool requested_result_is_nullable) const { @@ -2558,14 +2593,19 @@ class CastOverloadResolver : public IFunctionOverloadResolverImpl { public: using MonotonicityForRange = FunctionCast::MonotonicityForRange; + using Diagnostic = FunctionCast::Diagnostic; static constexpr auto name = "CAST"; static FunctionOverloadResolverImplPtr create(const Context & context); - static FunctionOverloadResolverImplPtr createImpl(bool keep_nullable) { return std::make_unique(keep_nullable); } + static FunctionOverloadResolverImplPtr createImpl(bool keep_nullable, std::optional diagnostic = {}) + { + return std::make_unique(keep_nullable, std::move(diagnostic)); + } - explicit CastOverloadResolver(bool keep_nullable_) - : keep_nullable(keep_nullable_) + + explicit CastOverloadResolver(bool keep_nullable_, std::optional diagnostic_ = {}) + : keep_nullable(keep_nullable_), diagnostic(std::move(diagnostic_)) {} String getName() const override { return name; } @@ -2584,7 +2624,7 @@ protected: data_types[i] = arguments[i].type; auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique(name, std::move(monotonicity), data_types, return_type); + return std::make_unique(name, std::move(monotonicity), data_types, return_type, diagnostic); } DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override @@ -2612,6 +2652,7 @@ protected: private: bool keep_nullable; + std::optional diagnostic; template static auto monotonicityForType(const DataType * const) diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 7c1221601f6..01456365740 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -183,7 +183,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -279,7 +279,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -415,7 +415,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -620,7 +620,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { RegionsNames::Language language = RegionsNames::Language::ru; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 0fae3de1fb2..92a1389d212 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -163,7 +163,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /** Do not require existence of the dictionary if the function is called for empty columns. * This is needed to allow successful query analysis on a server, @@ -204,7 +204,7 @@ private: template ColumnPtr executeDispatchSimple( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -227,7 +227,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -324,7 +324,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -359,7 +359,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -388,7 +388,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -423,7 +423,7 @@ private: template ColumnPtr executeDispatchRange( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -502,7 +502,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -621,7 +621,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -839,7 +839,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -873,7 +873,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -926,7 +926,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -967,7 +967,7 @@ private: template ColumnPtr executeDispatchRange( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1094,7 +1094,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1127,7 +1127,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1150,7 +1150,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const DictionaryType * dict, + const ColumnsWithTypeAndName & arguments, const DictionaryType * dict, const std::string & attr_name, const ColumnUInt64 * id_col) const { const auto * default_col_untyped = arguments[3].column.get(); @@ -1189,7 +1189,7 @@ private: template ColumnPtr executeDispatch( - ColumnsWithTypeAndName & arguments, const DictionaryType * dict, + const ColumnsWithTypeAndName & arguments, const DictionaryType * dict, const std::string & attr_name, const ColumnConst * id_col) const { const auto * default_col_untyped = arguments[3].column.get(); @@ -1246,7 +1246,7 @@ private: template ColumnPtr executeDispatchComplex( - ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1472,7 +1472,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } @@ -1613,7 +1613,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } @@ -1661,7 +1661,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1679,7 +1679,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) @@ -1814,7 +1814,7 @@ private: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); @@ -1832,7 +1832,7 @@ private: } template - ColumnPtr executeDispatch(ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const + ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) diff --git a/src/Functions/FunctionsExternalModels.cpp b/src/Functions/FunctionsExternalModels.cpp index 9c1892012e1..ecec9383252 100644 --- a/src/Functions/FunctionsExternalModels.cpp +++ b/src/Functions/FunctionsExternalModels.cpp @@ -69,7 +69,7 @@ DataTypePtr FunctionModelEvaluate::getReturnTypeImpl(const ColumnsWithTypeAndNam return type; } -ColumnPtr FunctionModelEvaluate::executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const +ColumnPtr FunctionModelEvaluate::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const { const auto * name_col = checkAndGetColumnConst(arguments[0].column.get()); if (!name_col) @@ -85,7 +85,7 @@ ColumnPtr FunctionModelEvaluate::executeImpl(ColumnsWithTypeAndName & arguments, column_ptrs.reserve(arguments.size()); for (auto arg : ext::range(1, arguments.size())) { - auto & column = arguments[arg].column; + const auto & column = arguments[arg].column; column_ptrs.push_back(column.get()); if (auto full_column = column->convertToFullColumnIfConst()) { diff --git a/src/Functions/FunctionsExternalModels.h b/src/Functions/FunctionsExternalModels.h index 336dc164248..9bb6cc5a77c 100644 --- a/src/Functions/FunctionsExternalModels.h +++ b/src/Functions/FunctionsExternalModels.h @@ -32,7 +32,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; private: const ExternalModelsLoader & models_loader; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 17165e12e37..fca27fe2f14 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -555,7 +555,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -616,7 +616,7 @@ private: using ToType = typename Impl::ReturnType; template - ColumnPtr executeType(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; @@ -659,7 +659,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); @@ -713,7 +713,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } @@ -1065,7 +1065,7 @@ public: return std::make_shared>(); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { size_t rows = input_rows_count; auto col_to = ColumnVector::create(rows); @@ -1107,7 +1107,7 @@ public: #endif } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return selector.selectAndExecute(arguments, result_type, input_rows_count); } @@ -1230,7 +1230,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto arg_count = arguments.size(); @@ -1243,7 +1243,7 @@ public: } private: - ColumnPtr executeSingleArg(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeSingleArg(const ColumnsWithTypeAndName & arguments) const { const auto * col_untyped = arguments.front().column.get(); @@ -1273,7 +1273,7 @@ private: " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } - ColumnPtr executeTwoArgs(ColumnsWithTypeAndName & arguments) const + ColumnPtr executeTwoArgs(const ColumnsWithTypeAndName & arguments) const { const auto * level_col = arguments.back().column.get(); if (!isColumnConst(*level_col)) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 7478c1627af..7516600ac85 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes } -std::vector FunctionJSONHelpers::prepareMoves(const char * function_name, ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments) +std::vector FunctionJSONHelpers::prepareMoves(const char * function_name, const ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments) { std::vector moves; moves.reserve(num_index_arguments); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 0fcf1f57f82..aea5829eaef 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -55,7 +55,7 @@ public: class Executor { public: - static ColumnPtr run(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -166,7 +166,7 @@ private: String key; }; - static std::vector prepareMoves(const char * function_name, ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments); + static std::vector prepareMoves(const char * function_name, const ColumnsWithTypeAndName & columns, size_t first_index_argument, size_t num_index_arguments); /// Performs moves of types MoveType::Index and MoveType::ConstIndex. template @@ -286,7 +286,7 @@ public: return Impl::getReturnType(Name::name, arguments); } - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /// Choose JSONParser. #if USE_SIMDJSON diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 3e19516daaa..ab8e1cfc0b2 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -509,7 +509,7 @@ DataTypePtr FunctionAnyArityLogical::getReturnTypeImpl(const DataTyp template ColumnPtr FunctionAnyArityLogical::executeImpl( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { ColumnRawPtrs args_in; for (const auto & arg_index : arguments) @@ -550,7 +550,7 @@ DataTypePtr FunctionUnaryLogical::getReturnTypeImpl(const DataTypes } template