From 0952c94abd8aea4cdd631e0f2118cc96fb8f5a90 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Sun, 13 Jun 2021 01:47:01 +0800 Subject: [PATCH 001/183] Update index.md --- .../engines/table-engines/integrations/index.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md index 17e9d204aa6..0c34ae078a0 100644 --- a/docs/zh/engines/table-engines/integrations/index.md +++ b/docs/zh/engines/table-engines/integrations/index.md @@ -1,8 +1,21 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u96C6\u6210" toc_priority: 30 --- +# 集成的表引擎 {#table-engines-for-integrations} +ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像所有其他的表引擎一样,使用`CREATE TABLE`或`ALTER TABLE`查询语句来完成配置。然后从用户的角度来看,配置的集成看起来像查询一个正常的表,但对它的查询是代理给外部系统的。这种透明的查询是这种方法相对于其他集成方法的主要优势之一,比如外部字典或表函数,它们需要在每次使用时使用自定义查询方法。 + +以下是支持的集成方式: + +- [ODBC](../../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) +- [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) From 190b4435182b428b5689fdef7f8937694bd47931 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 14 Jun 2021 18:34:56 +0300 Subject: [PATCH 002/183] First draft --- .../reference/quantileexact.md | 95 +++++++++++++++++++ .../reference/quantiles.md | 95 +++++++++++++++++++ 2 files changed, 190 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 06ef7ccfbd3..84cf187cf20 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -158,6 +158,101 @@ Result: │ 5 │ └───────────────────────────┘ ``` + +## quantileExactExclusive {#quantileexactexclusive} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. + +**Syntax** + +``` sql +quantileExactExclusive(level)(expr) +``` + +**Arguments** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantileExactExclusive(0.6)(x)─┐ +│ 599.6 │ +└────────────────────────────────┘ +``` + +## quantileExactInclusive {#quantileexactinclusive} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. + +**Syntax** + +``` sql +quantileExactInclusive(level)(expr) +``` + +**Arguments** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantileExactInclusive(0.6)(x)─┐ +│ 599.4 │ +└────────────────────────────────┘ +``` + **See Also** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index abce6a9e7f0..72b53f307db 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -7,3 +7,98 @@ toc_priority: 201 Syntax: `quantiles(level1, level2, …)(x)` All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. + +## quantilesExactExclusive {#quantilesexactexclusive} + +Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +Works more efficiently with sets of levels than [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). + +**Syntax** + +``` sql +quantilesExactExclusive(level1, level2, ...)(expr) +``` + +**Arguments** + +- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0.01, 0.99]`. +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## quantilesExactInclusive {#quantilesexactinclusive} + +Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +Works more efficiently with sets of levels than [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive). + +**Syntax** + +``` sql +quantilesExactInclusive(level1, level2, ...)(expr) +``` + +**Arguments** + +- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0.01, 0.99]`. +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. + +Type of array values: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + + +**Example** + +Query: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Result: + +``` text +┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ +└─────────────────────────────────────────────────────────────────────┘ +``` From 39e843d9c7a32301ee2a0ed32ca29f6137fb2a92 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 15 Jun 2021 09:45:19 +0300 Subject: [PATCH 003/183] Some code for snapshot deserialization --- src/Coordination/KeeperSnapshotManager.cpp | 14 +- src/Coordination/ZooKeeperSnapshotReader.cpp | 183 +++++++++++++++++++ src/Coordination/ZooKeeperSnapshotReader.h | 23 +++ 3 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 src/Coordination/ZooKeeperSnapshotReader.cpp create mode 100644 src/Coordination/ZooKeeperSnapshotReader.h diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 7520f9b3ba2..6dfc0d787d5 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -345,11 +345,23 @@ KeeperSnapshotManager::KeeperSnapshotManager(const std::string & snapshots_path_ for (const auto & p : fs::directory_iterator(snapshots_path)) { - if (startsWith(p.path(), "tmp_")) /// Unfinished tmp files + const auto & path = p.path(); + + if (!path.has_filename()) + continue; + + if (startsWith(path.filename(), "tmp_")) /// Unfinished tmp files { std::filesystem::remove(p); continue; } + + /// Not snapshot file + if (!startsWith(path.filename(), "snapshot_")) + { + continue; + } + size_t snapshot_up_to = getSnapshotPathUpToLogIdx(p.path()); existing_snapshots[snapshot_up_to] = p.path(); } diff --git a/src/Coordination/ZooKeeperSnapshotReader.cpp b/src/Coordination/ZooKeeperSnapshotReader.cpp new file mode 100644 index 00000000000..df758f870ee --- /dev/null +++ b/src/Coordination/ZooKeeperSnapshotReader.cpp @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +static String parentPath(const String & path) +{ + auto rslash_pos = path.rfind('/'); + if (rslash_pos > 0) + return path.substr(0, rslash_pos); + return "/"; +} + +static std::string getBaseName(const String & path) +{ + size_t basename_start = path.rfind('/'); + return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; +} + +int64_t getZxidFromName(const std::string & filename) +{ + std::filesystem::path path(filename); + std::string extension = path.extension(); + //std::cerr << "Extension:" << extension << std::endl; + char * end; + int64_t zxid = std::strtoul(extension.data() + 1, &end, 16); + return zxid; +} + +void deserializeMagic(ReadBuffer & in) +{ + int32_t magic_header, version; + int64_t dbid; + Coordination::read(magic_header, in); + Coordination::read(version, in); + Coordination::read(dbid, in); + //const char * data = "ZKSN"; + //std::cerr << "Expected Hedader:" << *reinterpret_cast(data) << std::endl; + //std::cerr << "MAGIC HEADER:" << magic_header << std::endl; + //std::cerr << "VERSION:" << version << std::endl; + //std::cerr << "DBID:" << dbid << std::endl; +} + +int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) +{ + int32_t count; + Coordination::read(count, in); + //std::cerr << "Total session and timeout:" << count << std::endl; + int64_t max_session_id = 0; + while (count > 0) + { + int64_t session_id; + int32_t timeout; + + Coordination::read(session_id, in); + Coordination::read(timeout, in); + //std::cerr << "Session id:" << session_id << std::endl; + //std::cerr << "Timeout:" << timeout << std::endl; + storage.addSessionID(session_id, timeout); + max_session_id = std::max(session_id, max_session_id); + count--; + } + std::cerr << "Done deserializing sessions\n"; + return max_session_id; +} + +void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) +{ + int32_t count; + Coordination::read(count, in); + //std::cerr << "ACLs Count:" << count << "\n"; + while (count > 0) + { + int64_t map_index; + Coordination::read(map_index, in); + //std::cerr << "Map index:" << map_index << "\n"; + + Coordination::ACLs acls; + int32_t acls_len; + Coordination::read(acls_len, in); + + //std::cerr << "ACLs len:" << acls_len << "\n"; + while (acls_len > 0) + { + Coordination::ACL acl; + Coordination::read(acl.permissions, in); + Coordination::read(acl.scheme, in); + Coordination::read(acl.id, in); + //std::cerr << "ACL perms:" << acl.permissions << "\n"; + //std::cerr << "ACL scheme:" << acl.scheme << "\n"; + //std::cerr << "ACL id:" << acl.id << "\n"; + acls.push_back(acl); + acls_len--; + } + storage.acl_map.addMapping(map_index, acls); + + count--; + } + std::cerr << "Done deserializing ACLs Total" << count << "\n"; +} + +int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) +{ + int64_t max_zxid = 0; + std::string path; + Coordination::read(path, in); + //std::cerr << "Read path FIRST length:" << path.length() << std::endl; + //std::cerr << "Read path FIRST data:" << path << std::endl; + size_t count = 0; + while (path != "/") + { + KeeperStorage::Node node{}; + Coordination::read(node.data, in); + Coordination::read(node.acl_id, in); + + /// Deserialize stat + Coordination::read(node.stat.czxid, in); + Coordination::read(node.stat.mzxid, in); + /// For some reason ZXID specified in filename can be smaller + /// then actual zxid from nodes. + max_zxid = std::max(max_zxid, node.stat.mzxid); + + Coordination::read(node.stat.ctime, in); + Coordination::read(node.stat.mtime, in); + Coordination::read(node.stat.version, in); + Coordination::read(node.stat.cversion, in); + Coordination::read(node.stat.aversion, in); + Coordination::read(node.stat.ephemeralOwner, in); + Coordination::read(node.stat.pzxid, in); + if (!path.empty()) + { + node.stat.dataLength = node.data.length(); + node.seq_num = node.stat.cversion; + storage.container.insertOrReplace(path, node); + + if (node.stat.ephemeralOwner != 0) + storage.ephemerals[node.stat.ephemeralOwner].insert(path); + + storage.acl_map.addUsage(node.acl_id); + } + Coordination::read(path, in); + count++; + if (count % 1000 == 0) + std::cerr << "Deserialized nodes:" << count << std::endl; + } + + for (const auto & itr : storage.container) + { + if (itr.key != "/") + { + auto parent_path = parentPath(itr.key); + storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; }); + } + } + + return max_zxid; +} + +void deserializeKeeperStorage(KeeperStorage & storage, const std::string & path) +{ + int64_t zxid = getZxidFromName(path); + //std::cerr << "Got ZXID:" << zxid << std::endl; + + ReadBufferFromFile reader(path); + + deserializeMagic(reader); + auto max_session_id = deserializeSessionAndTimeout(storage, reader); + + storage.session_id_counter = max_session_id; + deserializeACLMap(storage, reader); + + int64_t zxid_from_nodes = deserializeStorageData(storage, reader); + storage.zxid = std::max(zxid, zxid_from_nodes); +} + +} diff --git a/src/Coordination/ZooKeeperSnapshotReader.h b/src/Coordination/ZooKeeperSnapshotReader.h new file mode 100644 index 00000000000..8006f69a6f8 --- /dev/null +++ b/src/Coordination/ZooKeeperSnapshotReader.h @@ -0,0 +1,23 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace DB +{ + +int64_t getZxidFromName(const std::string & filename); + +void deserializeMagic(ReadBuffer & in); + +int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in); + +void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in); + +int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in); + +void deserializeKeeperStorage(KeeperStorage & storage, const std::string & path); + +} From 40101cbf827ba9a9f17d3687a23090465cf0dfb4 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Tue, 15 Jun 2021 22:29:11 +0300 Subject: [PATCH 004/183] edited EN, added RU --- docs/en/getting-started/install.md | 6 ++-- .../external-dicts-dict-lifetime.md | 12 +++---- docs/ru/getting-started/install.md | 13 +++++-- .../external-dicts-dict-lifetime.md | 36 +++++++++++++++++-- 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 4256de49e4a..3de90156a41 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -94,11 +94,11 @@ For production environments, it’s recommended to use the latest `stable`-versi To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. -### Single Binary +### Single Binary {#from-single-binary} -You can install ClickHouse on Linux using single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. +You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. -``` +``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse sudo ./clickhouse install ``` diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index e339461e428..1d79c9a28bf 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -57,7 +57,7 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. +- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). - Dictionaries from other sources are updated every time by default. For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: @@ -88,13 +88,13 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported. -It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after previous update. If `update_field` is specified in as part of dictionary source configuration value of previous update time in seconds will be added to data request. Depends of source type Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC different logic will be applied to `update_field` before request data from external source. +It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration value of the previous update time in seconds will be added to the data request. Depends on source type Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC different logic will be applied to `update_field` before request data from an external source. -- If source is HTTP then `update_field` will be added as query parameter with last update time as parameter value. -- If source is Executable then `update_field` will be added as executable script argument with last update time as argument value. -- If source is ClickHouse, MySQL, PostgreSQL, ODBC there will be additional part of WHERE, where `update_field` is compared as greater or equal with last update time. +- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. +- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. +- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of WHERE, where `update_field` is compared as greater or equal with the last update time. -If `update_field` option is set. Additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. +If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. Example of settings: diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index d0a54d9043a..c273d64f783 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -87,9 +87,18 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. +### Из единого бинарника {#from-single-binary} + +Для установки ClickHouse на Linux можно использовать единый переносимый бинарник из последнего коммита ветки `master`: [https://builds.clickhouse.tech/master/amd64/clickhouse]. + +``` bash +curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse +sudo ./clickhouse install +``` + ### Из исполняемых файлов для нестандартных окружений {#from-binaries-non-linux} -Для других операционных систем и архитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника с последнего коммита ветки master (с задержкой в несколько часов). +Для других операционных систем и архитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника из последнего коммита ветки `master` (с задержкой в несколько часов). - [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` - [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` @@ -97,7 +106,7 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh После скачивания можно воспользоваться `clickhouse client` для подключения к серверу или `clickhouse local` для обработки локальных данных. -Чтобы установить ClickHouse в рамках всей системы (с необходимыми конфигурационными файлами, настройками пользователей и т.д.), выполните `sudo ./clickhouse install`. Затем выполните команды `clickhouse start` (чтобы запустить сервер) и `clickhouse-client` (чтобы подключиться к нему). +Чтобы установить ClickHouse в рамках всей системы (с необходимыми конфигурационными файлами, настройками пользователей и т.д.), выполните `sudo ./clickhouse install`. Затем выполните команды `clickhouse start` (чтобы запустить сервер) и `clickhouse-client` (чтобы подключиться к нему). Данные сборки не рекомендуются для использования в продакшене, так как они недостаточно тщательно протестированны. Также, в них присутствуют не все возможности ClickHouse. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 1298f05eca0..388d54c21a0 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -52,12 +52,12 @@ LIFETIME(MIN 300 MAX 360) ``` Если `0` и `0`, ClickHouse не перегружает словарь по истечению времени. -В этм случае, ClickHouse может перезагрузить данные словаря если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. +В этом случае, ClickHouse может перезагрузить данные словаря если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): - У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется. -- Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`. +- Для MySQL источника время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`). - Словари из других источников по умолчанию обновляются каждый раз. Для других источников (ODBC, PostgreSQL, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия: @@ -86,4 +86,34 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher ... ``` -Для словарей `Cache`, `ComplexKeyCache`, `SSDCache` и `SSDComplexKeyCache` поддерживается как синхронное, так и асинхронное обновление. +Для словарей `Cache`, `ComplexKeyCache`, `SSDCache` и `SSDComplexKeyCache` поддерживается как синхронное, так и асинхронное обновление. + +Также словари `Flat`, `Hashed`, `ComplexKeyHashed` могут запрашивать только те данные, которые были изменены после предыдущего обновления. Если `update_field` указана как часть конфигурации источника словаря, к запросу данных будет добавлено время предыдущего обновления в секундах. В зависимости от типа источника (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC) к `update_field` будет применена соответствующая логика перед запросом данных из внешнего источника. + +- Если источник HTTP, то `update_field` будет добавлена в качестве параметра запроса, а время последнего обновления — в качестве значения параметра. +- Если источник Executable, то `update_field` будет добавлена в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. +- Если источник ClickHouse, MySQL, PostgreSQL или ODBC, то будет дополнительная часть запроса `WHERE`, где `update_field` будет больше или равна времени последнего обновления. + +Если установлена опция `update_field`, то может быть установлена дополнительная опция `update_lag`. Значение параметра `update_lag` вычитается из времени предыдущего обновления перед запросом обновленных данных. + +Пример настройки: + +``` xml + + ... + + ... + added_time + 15 + + ... + +``` + +или + +``` sql +... +SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) +... +``` \ No newline at end of file From eef8f367417abdeace8acc232e4a881a9642dd91 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Tue, 15 Jun 2021 22:54:02 +0300 Subject: [PATCH 005/183] edited link --- docs/en/getting-started/install.md | 2 +- docs/ru/getting-started/install.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 3de90156a41..5cec83c3819 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -96,7 +96,7 @@ To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.doc ### Single Binary {#from-single-binary} -You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. +You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: https://builds.clickhouse.tech/master/amd64/clickhouse. ``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index c273d64f783..2924958ddf4 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -89,7 +89,7 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh ### Из единого бинарника {#from-single-binary} -Для установки ClickHouse на Linux можно использовать единый переносимый бинарник из последнего коммита ветки `master`: [https://builds.clickhouse.tech/master/amd64/clickhouse]. +Для установки ClickHouse на Linux можно использовать единый переносимый бинарник из последнего коммита ветки `master`: https://builds.clickhouse.tech/master/amd64/clickhouse. ``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse From 80cf037f5c416ba1cfdc2e7ce5f9152d428098e9 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 17 Jun 2021 03:42:08 +0300 Subject: [PATCH 006/183] More work --- .../aggregate-functions/reference/quantileexact.md | 14 +++++++++----- .../aggregate-functions/reference/quantiles.md | 8 ++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 84cf187cf20..f8cf5c5e70d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -165,7 +165,9 @@ Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a num To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. +This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +When using multiple `quantileExactExclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. **Syntax** @@ -175,7 +177,7 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `(0, 1)`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -208,11 +210,13 @@ Result: ## quantileExactInclusive {#quantileexactinclusive} -Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. +This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + +When using multiple `quantileExactInclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. **Syntax** @@ -222,7 +226,7 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0, 1]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 72b53f307db..297f87b6e95 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -14,6 +14,8 @@ Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a nu To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. +This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + Works more efficiently with sets of levels than [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). **Syntax** @@ -24,7 +26,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0.01, 0.99]`. +- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `(0, 1)`. - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -61,6 +63,8 @@ Exactly computes the [quantiles](https://en.wikipedia.org/wiki/Quantile) of a nu To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. +This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). + Works more efficiently with sets of levels than [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive). **Syntax** @@ -71,7 +75,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0.01, 0.99]`. +- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0, 1]`. - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** From 0a959f87076fd5fe8e571ec572a8d313d41dbe59 Mon Sep 17 00:00:00 2001 From: George Date: Thu, 17 Jun 2021 03:55:24 +0300 Subject: [PATCH 007/183] small fixes --- .../aggregate-functions/reference/quantileexact.md | 4 ++-- .../sql-reference/aggregate-functions/reference/quantiles.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index f8cf5c5e70d..005d039e7c5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -177,7 +177,7 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `(0, 1)`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number in the range `(0, 1)`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -226,7 +226,7 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0, 1]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number in the range `[0, 1]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 297f87b6e95..9723e0ee29c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -26,7 +26,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `(0, 1)`. +- `level` — Leveles of quantiles. Constant floating-point numbers in the range `(0, 1)`. - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -75,7 +75,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers from 0 to 1. We recommend using a `level` values in the range of `[0, 1]`. +- `level` — Leveles of quantiles. Constant floating-point numbers in the range `[0, 1]`. - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** From d513d14b2c3367111f8784656c720cf2bc737ed1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Jun 2021 16:29:11 +0300 Subject: [PATCH 008/183] Add some functions for data conversion --- src/Coordination/ZooKeeperDataReader.cpp | 516 +++++++++++++++++++ src/Coordination/ZooKeeperDataReader.h | 17 + src/Coordination/ZooKeeperSnapshotReader.cpp | 183 ------- src/Coordination/ZooKeeperSnapshotReader.h | 23 - 4 files changed, 533 insertions(+), 206 deletions(-) create mode 100644 src/Coordination/ZooKeeperDataReader.cpp create mode 100644 src/Coordination/ZooKeeperDataReader.h delete mode 100644 src/Coordination/ZooKeeperSnapshotReader.cpp delete mode 100644 src/Coordination/ZooKeeperSnapshotReader.h diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp new file mode 100644 index 00000000000..60882993c0f --- /dev/null +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -0,0 +1,516 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int CORRUPTED_DATA; +} + +static String parentPath(const String & path) +{ + auto rslash_pos = path.rfind('/'); + if (rslash_pos > 0) + return path.substr(0, rslash_pos); + return "/"; +} + +static std::string getBaseName(const String & path) +{ + size_t basename_start = path.rfind('/'); + return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; +} + +int64_t getZxidFromName(const std::string & filename) +{ + std::filesystem::path path(filename); + std::string extension = path.extension(); + char * end; + int64_t zxid = std::strtoul(extension.data() + 1, &end, 16); + return zxid; +} + +void deserializeSnapshotMagic(ReadBuffer & in) +{ + int32_t magic_header, version; + int64_t dbid; + Coordination::read(magic_header, in); + Coordination::read(version, in); + Coordination::read(dbid, in); + static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN" + if (magic_header != SNP_HEADER) + throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); +} + +int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) +{ + int32_t count; + Coordination::read(count, in); + int64_t max_session_id = 0; + while (count > 0) + { + int64_t session_id; + int32_t timeout; + + Coordination::read(session_id, in); + Coordination::read(timeout, in); + storage.addSessionID(session_id, timeout); + max_session_id = std::max(session_id, max_session_id); + count--; + } + return max_session_id; +} + +void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) +{ + int32_t count; + Coordination::read(count, in); + while (count > 0) + { + int64_t map_index; + Coordination::read(map_index, in); + + Coordination::ACLs acls; + int32_t acls_len; + Coordination::read(acls_len, in); + + while (acls_len > 0) + { + Coordination::ACL acl; + Coordination::read(acl.permissions, in); + Coordination::read(acl.scheme, in); + Coordination::read(acl.id, in); + acls.push_back(acl); + acls_len--; + } + storage.acl_map.addMapping(map_index, acls); + + count--; + } +} + +int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) +{ + int64_t max_zxid = 0; + std::string path; + Coordination::read(path, in); + size_t count = 0; + while (path != "/") + { + KeeperStorage::Node node{}; + Coordination::read(node.data, in); + Coordination::read(node.acl_id, in); + + /// Deserialize stat + Coordination::read(node.stat.czxid, in); + Coordination::read(node.stat.mzxid, in); + /// For some reason ZXID specified in filename can be smaller + /// then actual zxid from nodes. In this case we will use zxid from nodes. + max_zxid = std::max(max_zxid, node.stat.mzxid); + + Coordination::read(node.stat.ctime, in); + Coordination::read(node.stat.mtime, in); + Coordination::read(node.stat.version, in); + Coordination::read(node.stat.cversion, in); + Coordination::read(node.stat.aversion, in); + Coordination::read(node.stat.ephemeralOwner, in); + Coordination::read(node.stat.pzxid, in); + if (!path.empty()) + { + node.stat.dataLength = node.data.length(); + node.seq_num = node.stat.cversion; + storage.container.insertOrReplace(path, node); + + if (node.stat.ephemeralOwner != 0) + storage.ephemerals[node.stat.ephemeralOwner].insert(path); + + storage.acl_map.addUsage(node.acl_id); + } + Coordination::read(path, in); + count++; + if (count % 1000 == 0) + std::cerr << "Deserialized nodes from snapshot:" << count << std::endl; + } + + for (const auto & itr : storage.container) + { + if (itr.key != "/") + { + auto parent_path = parentPath(itr.key); + storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; }); + } + } + + return max_zxid; +} + +void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path) +{ + int64_t zxid = getZxidFromName(snapshot_path); + + ReadBufferFromFile reader(snapshot_path); + + deserializeSnapshotMagic(reader); + auto max_session_id = deserializeSessionAndTimeout(storage, reader); + + storage.session_id_counter = max_session_id; + deserializeACLMap(storage, reader); + + int64_t zxid_from_nodes = deserializeStorageData(storage, reader); + storage.zxid = std::max(zxid, zxid_from_nodes); +} + +void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path) +{ + namespace fs = std::filesystem; + std::map existing_snapshots; + for (const auto & p : fs::directory_iterator(path)) + { + const auto & log_path = p.path(); + if (!log_path.has_filename() || !startsWith(log_path.filename(), "snapshot.")) + continue; + int64_t zxid = getZxidFromName(log_path); + existing_snapshots[zxid] = p.path(); + } + /// deserialize only from latest snapshot + if (!existing_snapshots.empty()) + deserializeKeeperStorageFromSnapshot(storage, existing_snapshots.rbegin()->second); +} + +void deserializeLogMagic(ReadBuffer & in) +{ + int32_t magic_header, version; + int64_t dbid; + Coordination::read(magic_header, in); + Coordination::read(version, in); + Coordination::read(dbid, in); + + static constexpr int32_t LOG_HEADER = 1514884167; /// "ZKLG" + if (magic_header != LOG_HEADER) + throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", LOG_HEADER, magic_header); +} + + +/// For some reason zookeeper stores slightly different records in log then +/// requests. For example: +/// class CreateTxn { +/// ustring path; +/// buffer data; +/// vector acl; +/// boolean ephemeral; +/// int parentCVersion; +/// } +/// But Create Request: +/// class CreateRequest { +/// ustring path; +/// buffer data; +/// vector acl; +/// int flags; +/// } +/// +/// However type is the same OpNum... +/// +/// Also there is a comment in ZooKeeper's code base about log structure, but +/// it's almost completely incorrect. Actual ZooKeeper log structure starting from version 3.6+: +/// +/// Magic Header: "ZKLG" + 4 byte version + 8 byte dbid. +/// After that goes serialized transactions, in the following format: +/// 8 byte checksum +/// 4 byte transaction length +/// 8 byte session_id (author of the transaction) +/// 4 byte user XID +/// 8 byte ZXID +/// 8 byte transaction time +/// 4 byte transaction type (OpNum) +/// [Transaction body depending on transaction type] +/// 12 bytes tail (starting from 3.6+): 4 byte version + 8 byte checksum of data tree +/// 1 byte -- 0x42 +/// +/// Transaction body is quite simple for all kinds of transactions except +/// Multitransactions. Their structure is following: +/// 4 byte sub transactions count +/// 4 byte sub transaction length +/// [Transaction body depending on transaction type] +/// and so on +/// +/// Gotchas: +/// +/// 1) For some reason ZooKeeper store ErrorTxn's in log. It's +/// reasonable for Multitransactions, but why they store standalone errors +/// is not clear. +/// +/// 2) For some reason there is no 12 bytes tail (version + checksum of +/// tree) after standalone ErrorTxn. +/// +/// 3) The most strange thing: In one of our production logs (about 1.2GB +/// size) we have found Multitransaction with two sub transactions: Error1 +/// and Error2, both -1 OpCode. Normal Error transaction has 4 bytes length +/// (for error code), but the Error1 has 550 bytes length. What is more +/// strange, that this 550 bytes obviously was a part of Create transaction, +/// but the operation code was -1. We have added debug prints to original +/// zookeeper (3.6.3) and found that it just reads 550 bytes of this "Error" +/// transaction, tooks the first 4 bytes as an error code (it was 79, non +/// existing code) and skip all remaining 546 bytes. NOTE: it looks like a bug +/// in ZooKeeper. +/// +namespace +{ + +Coordination::ZooKeeperRequestPtr deserializeCreateTxn(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + Coordination::read(result->path, in); + Coordination::read(result->data, in); + Coordination::read(result->acls, in); + Coordination::read(result->is_ephemeral, in); + /// How we should use it? It should just increment on request execution + int32_t parent_c_version; + Coordination::read(parent_c_version, in); + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeDeleteTxn(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + Coordination::read(result->path, in); + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeSetTxn(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + Coordination::read(result->path, in); + Coordination::read(result->data, in); + Coordination::read(result->version, in); + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeCheckVersionTxn(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + Coordination::read(result->path, in); + Coordination::read(result->version, in); + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeCreateSession(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + int32_t timeout; + Coordination::read(timeout, in); + result->session_timeout_ms = timeout; + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeCloseSession(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + std::vector data; + Coordination::read(data, in); + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeErrorTxn(ReadBuffer & in) +{ + int32_t error; + Coordination::read(error, in); + return nullptr; +} + +Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in); + +Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtxn) +{ + int32_t type; + Coordination::read(type, in); + Coordination::ZooKeeperRequestPtr result; + int32_t sub_txn_length = 0; + if (subtxn) + Coordination::read(sub_txn_length, in); + + int64_t in_count_before = in.count(); + + switch (type) + { + case 1: + result = deserializeCreateTxn(in); + break; + case 2: + result = deserializeDeleteTxn(in); + break; + case 5: + result = deserializeSetTxn(in); + break; + case 13: + result = deserializeCheckVersionTxn(in); + break; + case 14: + result = deserializeMultiTxn(in); + break; + case -10: + result = deserializeCreateSession(in); + break; + case -11: + result = deserializeCloseSession(in); + break; + case -1: + result = deserializeErrorTxn(in); + break; + default: + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented operation {}", type); + } + + if (subtxn) + { + int64_t bytes_read = in.count() - in_count_before; + if (bytes_read < sub_txn_length) + in.ignore(sub_txn_length - bytes_read); + } + + return result; +} + +Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in) +{ + int32_t length; + Coordination::read(length, in); + + std::shared_ptr result = std::make_shared(); + bool error_found = false; + while (length > 0) + { + auto subrequest = deserializeTxnImpl(in, true); + if (subrequest) + result->requests.push_back(subrequest); + else + error_found = true; + length--; + } + return result; +} + +bool isErrorRequest(Coordination::ZooKeeperRequestPtr request) +{ + return request == nullptr; +} + +bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) +{ + for (const auto & subrequest : dynamic_cast(request.get())->requests) + if (dynamic_cast(subrequest.get())->getOpNum() == Coordination::OpNum::Error) + return true; + return false; +} + +} + +bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in) +{ + int64_t checksum; + Coordination::read(checksum, in); + /// Zero padding is possible until file end + if (checksum == 0) + return false; + + int32_t txn_len; + Coordination::read(txn_len, in); + int64_t count_before = in.count(); + int64_t session_id; + Coordination::read(session_id, in); + int32_t xid; + Coordination::read(xid, in); + int64_t zxid; + Coordination::read(zxid, in); + int64_t time; + Coordination::read(time, in); + + Coordination::ZooKeeperRequestPtr request = deserializeTxnImpl(in, false); + + /// For Error requests ZooKeeper doesn't store version + tree_digest + if (!isErrorRequest(request)) + { + int32_t version; + int64_t tree_digest; + Coordination::read(version, in); + Coordination::read(tree_digest, in); + } + + int64_t bytes_read = in.count() - count_before; + if (bytes_read < txn_len) + in.ignore(txn_len - bytes_read); + + /// We don't need to apply error requests + if (isErrorRequest(request)) + return true; + + request->xid = xid; + + if (zxid > storage.zxid) + { + /// Separate processing of session id requests + if (request->getOpNum() == Coordination::OpNum::SessionID) + { + const Coordination::ZooKeeperSessionIDRequest & session_id_request = dynamic_cast(*request); + storage.getSessionID(session_id_request.session_timeout_ms); + } + else + { + /// Skip failed multirequests + if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request)) + return true; + + storage.processRequest(request, session_id, zxid); + } + } + + return true; +} + +void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path) +{ + ReadBufferFromFile reader(log_path); + deserializeLogMagic(reader); + size_t counter = 0; + while (!reader.eof() && deserializeTxn(storage, reader)) + { + counter++; + if (counter % 1000 == 0) + std::cerr << "Deserialized from log: " << counter << std::endl; + + int8_t forty_two; + Coordination::read(forty_two, reader); + if (forty_two != 0x42) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Forty two check byte ({}) is not equal 0x42", forty_two); + } +} + +void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path) +{ + namespace fs = std::filesystem; + std::map existing_logs; + for (const auto & p : fs::directory_iterator(path)) + { + const auto & log_path = p.path(); + if (!log_path.has_filename() || !startsWith(log_path.filename(), "log.")) + continue; + int64_t zxid = getZxidFromName(log_path); + existing_logs[zxid] = p.path(); + } + + for (auto [zxid, log_path] : existing_logs) + { + if (zxid > storage.zxid) + deserializeLogAndApplyToStorage(storage, log_path); + } +} + +} diff --git a/src/Coordination/ZooKeeperDataReader.h b/src/Coordination/ZooKeeperDataReader.h new file mode 100644 index 00000000000..2716c9487b3 --- /dev/null +++ b/src/Coordination/ZooKeeperDataReader.h @@ -0,0 +1,17 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log = nullptr); + +void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log = nullptr); + +void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log = nullptr); + +void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log = nullptr); + +} diff --git a/src/Coordination/ZooKeeperSnapshotReader.cpp b/src/Coordination/ZooKeeperSnapshotReader.cpp deleted file mode 100644 index df758f870ee..00000000000 --- a/src/Coordination/ZooKeeperSnapshotReader.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -static String parentPath(const String & path) -{ - auto rslash_pos = path.rfind('/'); - if (rslash_pos > 0) - return path.substr(0, rslash_pos); - return "/"; -} - -static std::string getBaseName(const String & path) -{ - size_t basename_start = path.rfind('/'); - return std::string{&path[basename_start + 1], path.length() - basename_start - 1}; -} - -int64_t getZxidFromName(const std::string & filename) -{ - std::filesystem::path path(filename); - std::string extension = path.extension(); - //std::cerr << "Extension:" << extension << std::endl; - char * end; - int64_t zxid = std::strtoul(extension.data() + 1, &end, 16); - return zxid; -} - -void deserializeMagic(ReadBuffer & in) -{ - int32_t magic_header, version; - int64_t dbid; - Coordination::read(magic_header, in); - Coordination::read(version, in); - Coordination::read(dbid, in); - //const char * data = "ZKSN"; - //std::cerr << "Expected Hedader:" << *reinterpret_cast(data) << std::endl; - //std::cerr << "MAGIC HEADER:" << magic_header << std::endl; - //std::cerr << "VERSION:" << version << std::endl; - //std::cerr << "DBID:" << dbid << std::endl; -} - -int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) -{ - int32_t count; - Coordination::read(count, in); - //std::cerr << "Total session and timeout:" << count << std::endl; - int64_t max_session_id = 0; - while (count > 0) - { - int64_t session_id; - int32_t timeout; - - Coordination::read(session_id, in); - Coordination::read(timeout, in); - //std::cerr << "Session id:" << session_id << std::endl; - //std::cerr << "Timeout:" << timeout << std::endl; - storage.addSessionID(session_id, timeout); - max_session_id = std::max(session_id, max_session_id); - count--; - } - std::cerr << "Done deserializing sessions\n"; - return max_session_id; -} - -void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) -{ - int32_t count; - Coordination::read(count, in); - //std::cerr << "ACLs Count:" << count << "\n"; - while (count > 0) - { - int64_t map_index; - Coordination::read(map_index, in); - //std::cerr << "Map index:" << map_index << "\n"; - - Coordination::ACLs acls; - int32_t acls_len; - Coordination::read(acls_len, in); - - //std::cerr << "ACLs len:" << acls_len << "\n"; - while (acls_len > 0) - { - Coordination::ACL acl; - Coordination::read(acl.permissions, in); - Coordination::read(acl.scheme, in); - Coordination::read(acl.id, in); - //std::cerr << "ACL perms:" << acl.permissions << "\n"; - //std::cerr << "ACL scheme:" << acl.scheme << "\n"; - //std::cerr << "ACL id:" << acl.id << "\n"; - acls.push_back(acl); - acls_len--; - } - storage.acl_map.addMapping(map_index, acls); - - count--; - } - std::cerr << "Done deserializing ACLs Total" << count << "\n"; -} - -int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) -{ - int64_t max_zxid = 0; - std::string path; - Coordination::read(path, in); - //std::cerr << "Read path FIRST length:" << path.length() << std::endl; - //std::cerr << "Read path FIRST data:" << path << std::endl; - size_t count = 0; - while (path != "/") - { - KeeperStorage::Node node{}; - Coordination::read(node.data, in); - Coordination::read(node.acl_id, in); - - /// Deserialize stat - Coordination::read(node.stat.czxid, in); - Coordination::read(node.stat.mzxid, in); - /// For some reason ZXID specified in filename can be smaller - /// then actual zxid from nodes. - max_zxid = std::max(max_zxid, node.stat.mzxid); - - Coordination::read(node.stat.ctime, in); - Coordination::read(node.stat.mtime, in); - Coordination::read(node.stat.version, in); - Coordination::read(node.stat.cversion, in); - Coordination::read(node.stat.aversion, in); - Coordination::read(node.stat.ephemeralOwner, in); - Coordination::read(node.stat.pzxid, in); - if (!path.empty()) - { - node.stat.dataLength = node.data.length(); - node.seq_num = node.stat.cversion; - storage.container.insertOrReplace(path, node); - - if (node.stat.ephemeralOwner != 0) - storage.ephemerals[node.stat.ephemeralOwner].insert(path); - - storage.acl_map.addUsage(node.acl_id); - } - Coordination::read(path, in); - count++; - if (count % 1000 == 0) - std::cerr << "Deserialized nodes:" << count << std::endl; - } - - for (const auto & itr : storage.container) - { - if (itr.key != "/") - { - auto parent_path = parentPath(itr.key); - storage.container.updateValue(parent_path, [&path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; }); - } - } - - return max_zxid; -} - -void deserializeKeeperStorage(KeeperStorage & storage, const std::string & path) -{ - int64_t zxid = getZxidFromName(path); - //std::cerr << "Got ZXID:" << zxid << std::endl; - - ReadBufferFromFile reader(path); - - deserializeMagic(reader); - auto max_session_id = deserializeSessionAndTimeout(storage, reader); - - storage.session_id_counter = max_session_id; - deserializeACLMap(storage, reader); - - int64_t zxid_from_nodes = deserializeStorageData(storage, reader); - storage.zxid = std::max(zxid, zxid_from_nodes); -} - -} diff --git a/src/Coordination/ZooKeeperSnapshotReader.h b/src/Coordination/ZooKeeperSnapshotReader.h deleted file mode 100644 index 8006f69a6f8..00000000000 --- a/src/Coordination/ZooKeeperSnapshotReader.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace DB -{ - -int64_t getZxidFromName(const std::string & filename); - -void deserializeMagic(ReadBuffer & in); - -int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in); - -void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in); - -int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in); - -void deserializeKeeperStorage(KeeperStorage & storage, const std::string & path); - -} From 1a6abb4db468a12e9858db160c913168c669bf8f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Jun 2021 19:32:50 +0300 Subject: [PATCH 009/183] Better --- programs/CMakeLists.txt | 76 ++++++++++++++++++- programs/config_tools.h.in | 1 + programs/keeper-converter/CMakeLists.txt | 9 +++ programs/keeper-converter/KeeperConverter.cpp | 61 +++++++++++++++ .../clickhouse-keeper-converter.cpp | 2 + programs/main.cpp | 6 ++ src/Coordination/ZooKeeperDataReader.cpp | 57 +++++++++----- src/Coordination/ZooKeeperDataReader.h | 8 +- 8 files changed, 193 insertions(+), 27 deletions(-) create mode 100644 programs/keeper-converter/CMakeLists.txt create mode 100644 programs/keeper-converter/KeeperConverter.cpp create mode 100644 programs/keeper-converter/clickhouse-keeper-converter.cpp diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 2af0331c70b..c2d56ee6a17 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -49,11 +49,15 @@ option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) + +option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL}) + if (NOT USE_NURAFT) # RECONFIGURE_MESSAGE_LEVEL should not be used here, # since USE_NURAFT is set to OFF for FreeBSD and Darwin. - message (STATUS "clickhouse-keeper will not be built (lack of NuRaft)") + message (STATUS "clickhouse-keeper and clickhouse-keeper-converter will not be built (lack of NuRaft)") set(ENABLE_CLICKHOUSE_KEEPER OFF) + set(ENABLE_CLICKHOUSE_KEEPER_CONVERTER OFF) endif() if (CLICKHOUSE_SPLIT_BINARY) @@ -149,6 +153,12 @@ else() message(STATUS "ClickHouse keeper mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + message(STATUS "ClickHouse keeper-converter mode: ON") +else() + message(STATUS "ClickHouse keeper-converter mode: OFF") +endif() + if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) set(CLICKHOUSE_ONE_SHARED ON) endif() @@ -270,6 +280,10 @@ if (ENABLE_CLICKHOUSE_KEEPER) add_subdirectory (keeper) endif() +if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + add_subdirectory (keeper-converter) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () @@ -279,9 +293,51 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} ${CLICKHOUSE_KEEPER_INCLUDE}) + add_library(clickhouse-lib SHARED + ${CLICKHOUSE_SERVER_SOURCES} + ${CLICKHOUSE_CLIENT_SOURCES} + ${CLICKHOUSE_LOCAL_SOURCES} + ${CLICKHOUSE_BENCHMARK_SOURCES} + ${CLICKHOUSE_COPIER_SOURCES} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} + ${CLICKHOUSE_COMPRESSOR_SOURCES} + ${CLICKHOUSE_FORMAT_SOURCES} + ${CLICKHOUSE_OBFUSCATOR_SOURCES} + ${CLICKHOUSE_GIT_IMPORT_SOURCES} + ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} + ${CLICKHOUSE_KEEPER_SOURCES} + ${CLICKHOUSE_KEEPER_CONVERTER_SOURCES}) + + target_link_libraries(clickhouse-lib + ${CLICKHOUSE_SERVER_LINK} + ${CLICKHOUSE_CLIENT_LINK} + ${CLICKHOUSE_LOCAL_LINK} + ${CLICKHOUSE_BENCHMARK_LINK} + ${CLICKHOUSE_COPIER_LINK} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} + ${CLICKHOUSE_COMPRESSOR_LINK} + ${CLICKHOUSE_FORMAT_LINK} + ${CLICKHOUSE_OBFUSCATOR_LINK} + ${CLICKHOUSE_GIT_IMPORT_LINK} + ${CLICKHOUSE_ODBC_BRIDGE_LINK} + ${CLICKHOUSE_KEEPER_LINK} + ${CLICKHOUSE_KEEPER_CONVERTER_LINK}) + + target_include_directories(clickhouse-lib + ${CLICKHOUSE_SERVER_INCLUDE} + ${CLICKHOUSE_CLIENT_INCLUDE} + ${CLICKHOUSE_LOCAL_INCLUDE} + ${CLICKHOUSE_BENCHMARK_INCLUDE} + ${CLICKHOUSE_COPIER_INCLUDE} + ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} + ${CLICKHOUSE_COMPRESSOR_INCLUDE} + ${CLICKHOUSE_FORMAT_INCLUDE} + ${CLICKHOUSE_OBFUSCATOR_INCLUDE} + ${CLICKHOUSE_GIT_IMPORT_INCLUDE} + ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} + ${CLICKHOUSE_KEEPER_INCLUDE} + ${CLICKHOUSE_KEEPER_CONVERTER_INCLUDE}) + set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() @@ -312,6 +368,10 @@ if (CLICKHOUSE_SPLIT_BINARY) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper) endif () + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper-converter) + endif () + set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_ALL_TARGETS}) @@ -362,6 +422,9 @@ else () if (ENABLE_CLICKHOUSE_KEEPER) clickhouse_target_link_split_lib(clickhouse keeper) endif() + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + clickhouse_target_link_split_lib(clickhouse keeper-converter) + endif() if (ENABLE_CLICKHOUSE_INSTALL) clickhouse_target_link_split_lib(clickhouse install) endif () @@ -422,6 +485,11 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper) endif () + if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) + add_custom_target (clickhouse-keeper-converter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-converter DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) + endif () install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 50ba0c16a83..62fc076861c 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -17,3 +17,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER +#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER diff --git a/programs/keeper-converter/CMakeLists.txt b/programs/keeper-converter/CMakeLists.txt new file mode 100644 index 00000000000..d529f94d388 --- /dev/null +++ b/programs/keeper-converter/CMakeLists.txt @@ -0,0 +1,9 @@ +set (CLICKHOUSE_KEEPER_CONVERTER_SOURCES KeeperConverter.cpp) + +set (CLICKHOUSE_KEEPER_CONVERTER_LINK + PRIVATE + boost::program_options + dbms +) + +clickhouse_program_add(keeper-converter) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp new file mode 100644 index 00000000000..15dbc8bd220 --- /dev/null +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -0,0 +1,61 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +int mainEntryClickHouseKeeperConverter(int argc, char ** argv) +{ + using namespace DB; + namespace po = boost::program_options; + + po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); + desc.add_options() + ("help,h", "produce help message") + ("zookeeper-logs-dir", po::value(), "Path to directory with ZooKeeper logs") + ("zookeeper-snapshots-dir", po::value(), "Path to directory with ZooKeeper snapshots") + ("output-dir", po::value(), "Directory to place output clickhouse-keeper snapshot") + ; + po::variables_map options; + po::store(po::command_line_parser(argc, argv).options(desc).run(), options); + Poco::AutoPtr console_channel(new Poco::ConsoleChannel); + + Poco::Logger * logger = &Poco::Logger::get("KeeperConverter"); + logger->setChannel(console_channel); + + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " --zookeeper-logs-dir /var/lib/zookeeper/data/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/data/version-2 --output-dir /var/lib/clickhouse/coordination/snapshots" << std::endl; + std::cout << desc << std::endl; + return 0; + } + + try + { + DB::KeeperStorage storage(500, ""); + + DB::deserializeKeeperStorageFromSnapshotsDir(storage, options["zookeeper-snapshots-dir"].as(), logger); + DB::deserializeLogsAndApplyToStorage(storage, options["zookeeper-logs-dir"].as(), logger); + DB::SnapshotMetadataPtr snapshot_meta = std::make_shared(storage.getZXID(), 1, std::make_shared()); + DB::KeeperStorageSnapshot snapshot(&storage, snapshot_meta); + + DB::KeeperSnapshotManager manager(options["output-dir"].as(), 1); + auto snp = manager.serializeSnapshotToBuffer(snapshot); + auto path = manager.serializeSnapshotBufferToDisk(*snp, storage.getZXID()); + std::cout << "Snapshot serialized to path:" << path << std::endl; + } + catch (...) + { + std::cerr << getCurrentExceptionMessage(true) << '\n'; + return getCurrentExceptionCode(); + } + + return 0; +} diff --git a/programs/keeper-converter/clickhouse-keeper-converter.cpp b/programs/keeper-converter/clickhouse-keeper-converter.cpp new file mode 100644 index 00000000000..3cb6f99f837 --- /dev/null +++ b/programs/keeper-converter/clickhouse-keeper-converter.cpp @@ -0,0 +1,2 @@ +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +int main(int argc_, char ** argv_) { return mainEntryClickHouseKeeperConverter(argc_, argv_); } diff --git a/programs/main.cpp b/programs/main.cpp index c5df2596422..b03d6a4a590 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -59,6 +59,9 @@ int mainEntryClickHouseGitImport(int argc, char ** argv); #if ENABLE_CLICKHOUSE_KEEPER int mainEntryClickHouseKeeper(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_KEEPER +int mainEntryClickHouseKeeperConverter(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -119,6 +122,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_KEEPER {"keeper", mainEntryClickHouseKeeper}, #endif +#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER + {"keeper-converter", mainEntryClickHouseKeeperConverter}, +#endif #if ENABLE_CLICKHOUSE_INSTALL {"install", mainEntryClickHouseInstall}, {"start", mainEntryClickHouseStart}, diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 60882993c0f..a2ab85e0625 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -45,6 +45,8 @@ void deserializeSnapshotMagic(ReadBuffer & in) int64_t dbid; Coordination::read(magic_header, in); Coordination::read(version, in); + if (version != 2) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot deserialize ZooKeeper data other than version 2, got version {}", version); Coordination::read(dbid, in); static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN" if (magic_header != SNP_HEADER) @@ -98,7 +100,7 @@ void deserializeACLMap(KeeperStorage & storage, ReadBuffer & in) } } -int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) +int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * log) { int64_t max_zxid = 0; std::string path; @@ -138,7 +140,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) Coordination::read(path, in); count++; if (count % 1000 == 0) - std::cerr << "Deserialized nodes from snapshot:" << count << std::endl; + LOG_INFO(log, "Deserialized nodes from snapshot: {}", count); } for (const auto & itr : storage.container) @@ -153,23 +155,31 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in) return max_zxid; } -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path) +void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log) { + LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path); int64_t zxid = getZxidFromName(snapshot_path); ReadBufferFromFile reader(snapshot_path); deserializeSnapshotMagic(reader); + + LOG_INFO(log, "Magic deserialized, looks OK"); auto max_session_id = deserializeSessionAndTimeout(storage, reader); + LOG_INFO(log, "Sessions and timeouts deserialized"); storage.session_id_counter = max_session_id; deserializeACLMap(storage, reader); + LOG_INFO(log, "ACLs deserialized"); - int64_t zxid_from_nodes = deserializeStorageData(storage, reader); + LOG_INFO(log, "Deserializing data from snapshot"); + int64_t zxid_from_nodes = deserializeStorageData(storage, reader, log); storage.zxid = std::max(zxid, zxid_from_nodes); + + LOG_INFO(log, "Finished, snapshot ZXID {}", storage.zxid); } -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path) +void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log) { namespace fs = std::filesystem; std::map existing_snapshots; @@ -181,9 +191,13 @@ void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std int64_t zxid = getZxidFromName(log_path); existing_snapshots[zxid] = p.path(); } + + LOG_INFO(log, "Totally have {} snapshots, will use latest", existing_snapshots.size()); /// deserialize only from latest snapshot if (!existing_snapshots.empty()) - deserializeKeeperStorageFromSnapshot(storage, existing_snapshots.rbegin()->second); + deserializeKeeperStorageFromSnapshot(storage, existing_snapshots.rbegin()->second, log); + else + throw Exception(ErrorCodes::CORRUPTED_DATA, "No snapshots found on path {}. At least one snapshot must exist.", path); } void deserializeLogMagic(ReadBuffer & in) @@ -197,6 +211,9 @@ void deserializeLogMagic(ReadBuffer & in) static constexpr int32_t LOG_HEADER = 1514884167; /// "ZKLG" if (magic_header != LOG_HEADER) throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", LOG_HEADER, magic_header); + + if (version != 2) + throw Exception(ErrorCodes::NOT_IMPLEMENTED,"Cannot deserialize ZooKeeper data other than version 2, got version {}", version); } @@ -435,15 +452,7 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in) Coordination::ZooKeeperRequestPtr request = deserializeTxnImpl(in, false); - /// For Error requests ZooKeeper doesn't store version + tree_digest - if (!isErrorRequest(request)) - { - int32_t version; - int64_t tree_digest; - Coordination::read(version, in); - Coordination::read(tree_digest, in); - } - + /// Skip all other bytes int64_t bytes_read = in.count() - count_before; if (bytes_read < txn_len) in.ignore(txn_len - bytes_read); @@ -475,25 +484,31 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in) return true; } -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path) +void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log) { ReadBufferFromFile reader(log_path); + + LOG_INFO(log, "Deserializing log {}", log_path); deserializeLogMagic(reader); + LOG_INFO(log, "Header looks OK"); + size_t counter = 0; while (!reader.eof() && deserializeTxn(storage, reader)) { counter++; if (counter % 1000 == 0) - std::cerr << "Deserialized from log: " << counter << std::endl; + LOG_INFO(log, "Deserialized txns log: {}", counter); int8_t forty_two; Coordination::read(forty_two, reader); if (forty_two != 0x42) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Forty two check byte ({}) is not equal 0x42", forty_two); } + + LOG_INFO(log, "Finished {} deserialization, totally read {} records", log_path, counter); } -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path) +void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log) { namespace fs = std::filesystem; std::map existing_logs; @@ -506,10 +521,14 @@ void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string existing_logs[zxid] = p.path(); } + LOG_INFO(log, "Totally have {} logs", existing_logs.size()); + for (auto [zxid, log_path] : existing_logs) { if (zxid > storage.zxid) - deserializeLogAndApplyToStorage(storage, log_path); + deserializeLogAndApplyToStorage(storage, log_path, log); + else + LOG_INFO(log, "Skipping log {}, it's ZXID {} is smaller than storages ZXID {}", log_path, zxid, storage.zxid); } } diff --git a/src/Coordination/ZooKeeperDataReader.h b/src/Coordination/ZooKeeperDataReader.h index 2716c9487b3..5f26457c113 100644 --- a/src/Coordination/ZooKeeperDataReader.h +++ b/src/Coordination/ZooKeeperDataReader.h @@ -6,12 +6,12 @@ namespace DB { -void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log = nullptr); +void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::string & snapshot_path, Poco::Logger * log); -void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log = nullptr); +void deserializeKeeperStorageFromSnapshotsDir(KeeperStorage & storage, const std::string & path, Poco::Logger * log); -void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log = nullptr); +void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string & log_path, Poco::Logger * log); -void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log = nullptr); +void deserializeLogsAndApplyToStorage(KeeperStorage & storage, const std::string & path, Poco::Logger * log); } From a62957dba67fae2c9710f28698568fa488978e70 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Fri, 18 Jun 2021 00:44:14 +0800 Subject: [PATCH 010/183] Add zh translation for embedded-rocksdb.md --- .../integrations/embedded-rocksdb.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/zh/engines/table-engines/integrations/embedded-rocksdb.md diff --git a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..7c04600894e --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,42 @@ +--- +toc_priority: 9 +toc_title: EmbeddedRocksDB +--- + +# EmbeddedRocksDB 引擎 {#EmbeddedRocksDB-engine} + +这个引擎允许 ClickHouse 与 [rocksdb](http://rocksdb.org/) 进行集成。 + +## 创建一张表 {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +``` + +必要参数: + +- `primary_key_name` – any column name in the column list. +- 必须指定 `primary key`, 仅支持主键中的一个列. 主键将被序列化为二进制的`rocksdb key`. +- 主键以外的列将以相应的顺序在二进制中序列化为`rocksdb`值. +- 带有键 `equals` 或 `in` 过滤的查询将被优化为从 `rocksdb` 进行多键查询. + +示例: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key +``` + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/embedded-rocksdb/) From 1747c254dc461a504f8bb3b578fa35d798592896 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 17 Jun 2021 21:36:50 +0300 Subject: [PATCH 011/183] Remove unused flag --- src/Coordination/ZooKeeperDataReader.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index a2ab85e0625..5ce1f418c27 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -402,14 +402,11 @@ Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in) Coordination::read(length, in); std::shared_ptr result = std::make_shared(); - bool error_found = false; while (length > 0) { auto subrequest = deserializeTxnImpl(in, true); if (subrequest) result->requests.push_back(subrequest); - else - error_found = true; length--; } return result; From 8c97247fe1e7e719f02eff9cd98be8adb869d651 Mon Sep 17 00:00:00 2001 From: George Date: Fri, 18 Jun 2021 00:11:52 +0300 Subject: [PATCH 012/183] unrelated fix --- .../aggregate-functions/reference/quantileexact.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 005d039e7c5..3953bd81232 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -111,8 +111,7 @@ Result: Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. -All the passed values are combined into an array, which is then fully sorted, -to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons. +All the passed values are combined into an array, which is then fully sorted, to get the exact value. The sorting [algorithm's](https://en.cppreference.com/w/cpp/algorithm/sort) complexity is `O(N·log(N))`, where `N = std::distance(first, last)` comparisons. The return value depends on the quantile level and the number of elements in the selection, i.e. if the level is 0.5, then the function returns the higher median value for an even number of elements and the middle median value for an odd number of elements. Median is calculated similarly to the [median_high](https://docs.python.org/3/library/statistics.html#statistics.median_high) implementation which is used in python. For all other levels, the element at the index corresponding to the value of `level * size_of_array` is returned. From 9f36eb6210f0f7cac9995c6a537f4a30b2c14243 Mon Sep 17 00:00:00 2001 From: George Date: Fri, 18 Jun 2021 05:13:38 +0300 Subject: [PATCH 013/183] Fixes --- .../aggregate-functions/reference/quantileexact.md | 12 +++++++----- .../aggregate-functions/reference/quantiles.md | 7 ++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 3953bd81232..e7890f231bb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -2,7 +2,9 @@ toc_priority: 202 --- -# quantileExact {#quantileexact} +# quantileExact Functions {#quantileexact-functions} + +## quantileExact {#quantileexact} Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -49,7 +51,7 @@ Result: └───────────────────────┘ ``` -# quantileExactLow {#quantileexactlow} +## quantileExactLow {#quantileexactlow} Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -107,7 +109,7 @@ Result: │ 4 │ └──────────────────────────┘ ``` -# quantileExactHigh {#quantileexacthigh} +## quantileExactHigh {#quantileexacthigh} Similar to `quantileExact`, this computes the exact [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. @@ -176,7 +178,7 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number in the range `(0, 1)`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional. Possible values: (0, 1). Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -225,7 +227,7 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number in the range `[0, 1]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional. Possible values: [0, 1]. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 9723e0ee29c..6fcc7f2d0fe 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -2,7 +2,8 @@ toc_priority: 201 --- -# quantiles {#quantiles} +# quantiles Functions {#quantiles-functions} +## quantiles {#quantiles} Syntax: `quantiles(level1, level2, …)(x)` @@ -26,7 +27,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers in the range `(0, 1)`. +- `level` — Leveles of quantiles. Possible values: (0, 1). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -75,7 +76,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Constant floating-point numbers in the range `[0, 1]`. +- `level` — Leveles of quantiles. Possible values: [0, 1]. - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** From 2d8f45a0981ab15a6bf202d325b9a7783451fc7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 18 Jun 2021 11:55:59 +0300 Subject: [PATCH 014/183] Add some initialization --- src/Coordination/ZooKeeperDataReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 5ce1f418c27..e0d0fbc85b6 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -349,7 +349,7 @@ Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtx { int32_t type; Coordination::read(type, in); - Coordination::ZooKeeperRequestPtr result; + Coordination::ZooKeeperRequestPtr result = nullptr; int32_t sub_txn_length = 0; if (subtxn) Coordination::read(sub_txn_length, in); From 5a014cb1e1eff5e0255044bc766102f7e0c448fa Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 18 Jun 2021 14:02:15 +0300 Subject: [PATCH 015/183] Add sometest --- docker/test/integration/base/Dockerfile | 19 ++++- .../__init__.py | 1 + .../configs/keeper_config.xml | 23 ++++++ .../configs/logs_conf.xml | 12 +++ .../test_keeper_zookeeper_converter/test.py | 76 +++++++++++++++++++ 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_keeper_zookeeper_converter/__init__.py create mode 100644 tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml create mode 100644 tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml create mode 100644 tests/integration/test_keeper_zookeeper_converter/test.py diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 1c962f1bf8f..e15697da029 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -1,6 +1,8 @@ # docker build -t yandex/clickhouse-integration-test . FROM yandex/clickhouse-test-base +SHELL ["/bin/bash", "-c"] + RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ tzdata \ @@ -20,7 +22,9 @@ RUN apt-get update \ krb5-user \ iproute2 \ lsof \ - g++ + g++ \ + default-jre + RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ @@ -30,6 +34,19 @@ RUN apt-get clean # Install MySQL ODBC driver RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so +# Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. +# ZooKeeper is not started by default, but consumes some space in containers. +# 777 perms used to allow anybody to start/stop ZooKeeper +ENV ZOOKEEPER_VERSION='3.6.3' +RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz +RUN echo $'tickTime=2500 \n\ +tickTime=2500 \n\ +dataDir=/zookeeper \n\ +clientPort=2181 \n\ +maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg +RUN mkdir /zookeeper && chmod -R 777 /zookeeper + ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/tests/integration/test_keeper_zookeeper_converter/__init__.py b/tests/integration/test_keeper_zookeeper_converter/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml b/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml new file mode 100644 index 00000000000..ceaca04762e --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/configs/keeper_config.xml @@ -0,0 +1,23 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/logs + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + 75 + + + + + 1 + localhost + 44444 + + + + diff --git a/tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml b/tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml new file mode 100644 index 00000000000..318a6bca95d --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/configs/logs_conf.xml @@ -0,0 +1,12 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py new file mode 100644 index 00000000000..97c16b09cd9 --- /dev/null +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +from kazoo.client import KazooClient, KazooState +from kazoo.security import ACL, make_digest_acl, make_acl +from kazoo.exceptions import AuthFailedError, InvalidACLError, NoAuthError, KazooException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=['configs/keeper_config.xml', 'configs/logs_conf.xml'], stay_alive=True) + +def start_zookeeper(): + node.exec_in_container(['bash', '-c', '/opt/zookeeper/bin/zkServer.sh start']) + +def stop_zookeeper(): + node.exec_in_container(['bash', '-c', '/opt/zookeeper/bin/zkServer.sh stop']) + +def clear_clickhouse_data(): + node.exec_in_container(['bash', '-c', 'rm -fr /var/lib/clickhouse/coordination/logs/* /var/lib/clickhouse/coordination/snapshots/*']) + +def convert_zookeeper_data(): + cmd = '/usr/bin/clickhouse keeper-converter --zookeeper-logs-dir /zookeeper/version-2/ --zookeeper-snapshots-dir /zookeeper/version-2/ --output-dir /var/lib/clickhouse/coordination/snapshots' + node.exec_in_container(['bash', '-c', cmd]) + +def stop_clickhouse(): + node.stop_clickhouse() + +def start_clickhouse(): + node.start_clickhouse() + +def copy_zookeeper_data(): + stop_zookeeper() + stop_clickhouse() + clear_clickhouse_data() + convert_zookeeper_data() + print(node.exec_in_container) + start_zookeeper() + start_clickhouse() + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def get_fake_zk(timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip('node') + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + +def get_genuine_zk(timeout=30.0): + _genuine_zk_instance = KazooClient(hosts=cluster.get_instance_ip('node') + ":2181", timeout=timeout) + _genuine_zk_instance.start() + return _genuine_zk_instance + +def compare_states(zk1, zk2): + +def test_smoke(started_cluster): + start_zookeeper() + + genuine_connection = get_genuine_zk() + genuine_connection.create("/test", b"data") + + assert genuine_connection.get("/test")[0] == b"data" + + copy_zookeeper_data() + + fake_connection = get_fake_zk() + assert fake_connection.get("/test")[0] == b"data" + assert genuine_connection.get("/test")[0] == b"data" From 3173b285fdf5ba7417fb5474c06f12a136b33f87 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Fri, 18 Jun 2021 21:34:14 +0800 Subject: [PATCH 016/183] Add zh translation for s3.md --- .../engines/table-engines/integrations/s3.md | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 docs/zh/engines/table-engines/integrations/s3.md diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md new file mode 100644 index 00000000000..5b934dae2c4 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -0,0 +1,213 @@ +--- +toc_priority: 7 +toc_title: S3 +--- + +# S3 表引擎 {#table-engine-s3} + +这个引擎提供与[Amazon S3](https://aws.amazon.com/s3/)生态系统的集成。这个引擎类似于[HDFS](../../../engines/table-engines/integrations/hdfs.md)引擎,但提供了 S3 特有的功能。 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) +``` + +**引擎参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../../interfaces/formats.md#formats). +- `aws_access_key_id`, `aws_secret_access_key` - [AWS](https://aws.amazon.com/) 账号的长期凭证. 你可以使用凭证来对你的请求进行认证.参数是可选的. 如果没有指定凭据, 将从配置文件中读取凭据. 更多信息参见 [使用 S3 来存储数据](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**示例** + +1. 创建 `s3_engine_table` 表: + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip'); +``` + +2. 填充文件: + +``` sql +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +``` + +3. 查询数据: + +``` sql +SELECT * FROM s3_engine_table LIMIT 2; +``` + +```text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +有关虚拟列的更多信息,见 [这里](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## 实施细节 {#implementation-details} + +- 读取和写入可以是并行的 +- 以下是不支持的: + - `ALTER` 和 `SELECT...SAMPLE` 操作. + - 索引. + - 复制. + +## 路径中的通配符 {#wildcards-in-path} + +`path` 参数可以使用类 bash 的通配符来指定多个文件。对于正在处理的文件应该存在并匹配到整个路径模式。 文件列表的确定是在 `SELECT` 的时候进行(而不是在 `CREATE` 的时候)。 + +- `*` — 替代任何数量的任何字符,除了 `/` 以及空字符串。 +- `?` — 代替任何单个字符. +- `{some_string,another_string,yet_another_one}` — 替代 `'some_string', 'another_string', 'yet_another_one'`字符串. +- `{N..M}` — 替换 N 到 M 范围内的任何数字,包括两个边界的值. N 和 M 可以以 0 开头,比如 `000..078` + +带 `{}` 的结构类似于 [远程](../../../sql-reference/table-functions/remote.md) 表函数。 + +**示例** + +1. 假设我们在 S3 上有几个 CSV 格式的文件,URI如下: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +有几种方法来创建由所有六个文件组成的数据表: + +第一种方式: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式)。 + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +**示例** + +使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +**另请参阅** + +- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) + +## S3 相关的设置 {#settings} + +以下设置可以在查询执行前设置,也可以放在配置文件中。 + +- `s3_max_single_part_upload_size` - 使用单文件上传至 S3 的对象的最大文件大小。默认值是`64Mb`。 +- `s3_min_upload_part_size` - 使用[S3多文件块上传](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html)时,文件块的最小文件大小。默认值是`512Mb`。 +- `s3_max_redirects` - 允许的最大S3重定向跳数。默认值是`10`。 +- `s3_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。 + +安全考虑:如果恶意用户可以指定任意的 S3 网址,`s3_max_redirects`参数必须设置为零,以避免[SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery)攻击;或者,必须在服务器配置中指定`remote_host_filter`。 + +## 基于 Endpoint 的设置 {#endpoint-settings} + +在配置文件中可以为给定的端点指定以下设置(将通过URL的准确前缀来匹配)。 + +- `endpoint` - 指定一个端点的前缀。必要参数。 +- `access_key_id`和`secret_access_key` - 用于指定端点的登陆凭据。可选参数。 +- `use_environment_credentials` - 如果设置为`true`,S3客户端将尝试从环境变量和[Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud)元数据中为指定的端点获取证书。可选参数,默认值是`false`。 +- `region` - 指定S3的区域名称。可选参数。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端将使用不安全的 IMDS 请求,同时从Amazon EC2 元数据获取证书。可选参数,默认值是`false`。 +- `header` - 添加指定的HTTP头到给定端点的请求中。可选参数,可以使用多次此参数来添加多个值。 +- `server_side_encryption_customer_key_base64` - 如果指定,需要指定访问 SSE-C 加密的 S3 对象所需的头信息。可选参数。 +- `max_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。可选参数。 + +**示例:** + +``` xml + + + https://storage.yandexcloud.net/my-test-bucket-768/ + + + + + + + + + + +``` + +## 用法 {#usage-examples} + +假设我们在 S3 上有几个 CSV 格式的文件,URI 如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' + + +1. 有几种方式来制作由所有六个文件组成的表格,其中一种方式如下: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +2. 另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +3. 表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +!!! warning "Warning" + 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. + +4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 另请参阅 + +- [S3 表函数](../../../sql-reference/table-functions/s3.md) From b5dae909dde10b24618176c296428ea6045b98ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 18 Jun 2021 21:36:19 +0300 Subject: [PATCH 017/183] Add some tests --- src/Coordination/KeeperSnapshotManager.cpp | 6 + src/Coordination/KeeperStorage.cpp | 18 +- src/Coordination/ZooKeeperDataReader.cpp | 12 +- .../test_keeper_zookeeper_converter/test.py | 174 +++++++++++++++++- 4 files changed, 196 insertions(+), 14 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 42bc810f28e..40c898efdb5 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -99,6 +99,10 @@ namespace node.acl_id = acl_map.convertACLs(acls); } + /// Some strange ACLID during deserialization from ZooKeeper + if (node.acl_id == std::numeric_limits::max()) + node.acl_id = 0; + acl_map.addUsage(node.acl_id); readBinary(node.is_sequental, in); @@ -217,12 +221,14 @@ SnapshotMetadataPtr KeeperStorageSnapshot::deserialize(KeeperStorage & storage, if (current_version >= SnapshotVersion::V1) { size_t acls_map_size; + readBinary(acls_map_size, in); size_t current_map_size = 0; while (current_map_size < acls_map_size) { uint64_t acl_id; readBinary(acl_id, in); + size_t acls_size; readBinary(acls_size, in); Coordination::ACLs acls; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 3ae29edb77a..5418afb2501 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -263,6 +263,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest } else { + auto & session_auth_ids = storage.session_and_auth[session_id]; KeeperStorage::Node created_node; @@ -280,6 +281,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest created_node.acl_id = acl_id; created_node.stat.czxid = zxid; created_node.stat.mzxid = zxid; + created_node.stat.pzxid = zxid; created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1); created_node.stat.mtime = created_node.stat.ctime; created_node.stat.numChildren = 0; @@ -302,12 +304,15 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest } auto child_path = getBaseName(path_created); - container.updateValue(parent_path, [child_path] (KeeperStorage::Node & parent) + int64_t prev_parent_zxid; + container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid] (KeeperStorage::Node & parent) { /// Increment sequential number even if node is not sequential ++parent.seq_num; parent.children.insert(child_path); ++parent.stat.cversion; + prev_parent_zxid = parent.stat.pzxid; + parent.stat.pzxid = zxid; ++parent.stat.numChildren; }); @@ -317,7 +322,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest if (request.is_ephemeral) ephemerals[session_id].emplace(path_created); - undo = [&storage, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] + undo = [&storage, prev_parent_zxid, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id] { storage.container.erase(path_created); storage.acl_map.removeUsage(acl_id); @@ -325,11 +330,12 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest if (is_ephemeral) storage.ephemerals[session_id].erase(path_created); - storage.container.updateValue(parent_path, [child_path] (KeeperStorage::Node & undo_parent) + storage.container.updateValue(parent_path, [child_path, prev_parent_zxid] (KeeperStorage::Node & undo_parent) { --undo_parent.stat.cversion; --undo_parent.stat.numChildren; --undo_parent.seq_num; + undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.children.erase(child_path); }); }; @@ -536,6 +542,7 @@ struct KeeperStorageSetRequest final : public KeeperStorageRequest } else if (request.version == -1 || request.version == it->value.stat.version) { + auto prev_node = it->value; auto itr = container.updateValue(request.path, [zxid, request] (KeeperStorage::Node & value) @@ -901,10 +908,15 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina KeeperStorage::ResponsesForSessions results; if (new_last_zxid) { + LOG_INFO(&Poco::Logger::get("DEBUG"), "GOT ZXID {}", *new_last_zxid); if (zxid >= *new_last_zxid) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); zxid = *new_last_zxid; } + else + { + LOG_INFO(&Poco::Logger::get("DEBUG"), "NO ZXID PROVIDED"); + } session_expiry_queue.update(session_id, session_and_timeout[session_id]); if (zk_request->getOpNum() == Coordination::OpNum::Close) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index e0d0fbc85b6..4a324abe93d 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -308,6 +308,9 @@ Coordination::ZooKeeperRequestPtr deserializeSetTxn(ReadBuffer & in) Coordination::read(result->path, in); Coordination::read(result->data, in); Coordination::read(result->version, in); + /// It stores version + 1 (which should be, not for request) + result->version -= 1; + return result; } @@ -405,8 +408,7 @@ Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in) while (length > 0) { auto subrequest = deserializeTxnImpl(in, true); - if (subrequest) - result->requests.push_back(subrequest); + result->requests.push_back(subrequest); length--; } return result; @@ -420,14 +422,14 @@ bool isErrorRequest(Coordination::ZooKeeperRequestPtr request) bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) { for (const auto & subrequest : dynamic_cast(request.get())->requests) - if (dynamic_cast(subrequest.get())->getOpNum() == Coordination::OpNum::Error) + if (subrequest == nullptr) return true; return false; } } -bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in) +bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * log) { int64_t checksum; Coordination::read(checksum, in); @@ -490,7 +492,7 @@ void deserializeLogAndApplyToStorage(KeeperStorage & storage, const std::string LOG_INFO(log, "Header looks OK"); size_t counter = 0; - while (!reader.eof() && deserializeTxn(storage, reader)) + while (!reader.eof() && deserializeTxn(storage, reader, log)) { counter++; if (counter % 1000 == 0) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 97c16b09cd9..5c6ed90eb35 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,11 +1,10 @@ -#!/usr/bin/env python3 - #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import AuthFailedError, InvalidACLError, NoAuthError, KazooException +import os cluster = ClickHouseCluster(__file__) @@ -17,6 +16,14 @@ def start_zookeeper(): def stop_zookeeper(): node.exec_in_container(['bash', '-c', '/opt/zookeeper/bin/zkServer.sh stop']) +def clear_zookeeper(): + node.exec_in_container(['bash', '-c', 'rm -fr /zookeeper/*']) + +def restart_and_clear_zookeeper(): + stop_zookeeper() + clear_zookeeper() + start_zookeeper() + def clear_clickhouse_data(): node.exec_in_container(['bash', '-c', 'rm -fr /var/lib/clickhouse/coordination/logs/* /var/lib/clickhouse/coordination/snapshots/*']) @@ -59,10 +66,39 @@ def get_genuine_zk(timeout=30.0): _genuine_zk_instance.start() return _genuine_zk_instance -def compare_states(zk1, zk2): +def compare_stats(stat1, stat2, path): + assert stat1.czxid == stat2.czxid, "path " + path + " cxzids not equal for stats: " + str(stat1.czxid) + " != " + str(stat2.zxid) + assert stat1.mzxid == stat2.mzxid, "path " + path + " mxzids not equal for stats: " + str(stat1.mzxid) + " != " + str(stat2.mzxid) + assert stat1.version == stat2.version, "path " + path + " versions not equal for stats: " + str(stat1.version) + " != " + str(stat2.version) + assert stat1.cversion == stat2.cversion, "path " + path + " cversions not equal for stats: " + str(stat1.cversion) + " != " + str(stat2.cversion) + assert stat1.aversion == stat2.aversion, "path " + path + " aversions not equal for stats: " + str(stat1.aversion) + " != " + str(stat2.aversion) + assert stat1.ephemeralOwner == stat2.ephemeralOwner,"path " + path + " ephemeralOwners not equal for stats: " + str(stat1.ephemeralOwner) + " != " + str(stat2.ephemeralOwner) + assert stat1.dataLength == stat2.dataLength , "path " + path + " ephemeralOwners not equal for stats: " + str(stat1.dataLength) + " != " + str(stat2.dataLength) + assert stat1.numChildren == stat2.numChildren, "path " + path + " numChildren not equal for stats: " + str(stat1.numChildren) + " != " + str(stat2.numChildren) + assert stat1.pzxid == stat2.pzxid, "path " + path + " pzxid not equal for stats: " + str(stat1.pzxid) + " != " + str(stat2.pzxid) + +def compare_states(zk1, zk2, path="/"): + data1, stat1 = zk1.get(path) + data2, stat2 = zk2.get(path) + print("Left Stat", stat1) + print("Right Stat", stat2) + assert data1 == data2, "Data not equal on path " + str(path) + # both paths have strange stats + if path not in ("/", "/zookeeper"): + compare_stats(stat1, stat2, path) + + first_children = list(sorted(zk1.get_children(path))) + second_children = list(sorted(zk2.get_children(path))) + print("Got children left", first_children) + print("Got children rigth", second_children) + assert first_children == second_children, "Childrens are not equal on path " + path + + for children in first_children: + print("Checking child", os.path.join(path, children)) + compare_states(zk1, zk2, os.path.join(path, children)) def test_smoke(started_cluster): - start_zookeeper() + restart_and_clear_zookeeper() genuine_connection = get_genuine_zk() genuine_connection.create("/test", b"data") @@ -71,6 +107,132 @@ def test_smoke(started_cluster): copy_zookeeper_data() + genuine_connection = get_genuine_zk() fake_connection = get_fake_zk() - assert fake_connection.get("/test")[0] == b"data" - assert genuine_connection.get("/test")[0] == b"data" + + compare_states(genuine_connection, fake_connection) + +def get_bytes(s): + return s.encode() + +def test_simple_crud_requests(started_cluster): + restart_and_clear_zookeeper() + + genuine_connection = get_genuine_zk() + for i in range(100): + genuine_connection.create("/test_create" + str(i), get_bytes("data" + str(i))) + + # some set queries + for i in range(10): + for j in range(i + 1): + genuine_connection.set("/test_create" + str(i), get_bytes("value" + str(j))) + + for i in range(10, 20): + genuine_connection.delete("/test_create" + str(i)) + + path = "/test_create_deep" + for i in range(10): + genuine_connection.create(path, get_bytes("data" + str(i))) + path = os.path.join(path, str(i)) + + + genuine_connection.create("/test_sequential", b"") + for i in range(10): + genuine_connection.create("/test_sequential/" + "a" * i + "-", get_bytes("dataX" + str(i)), sequence=True) + + genuine_connection.create("/test_ephemeral", b"") + for i in range(10): + genuine_connection.create("/test_ephemeral/" + str(i), get_bytes("dataX" + str(i)), ephemeral=True) + + copy_zookeeper_data() + + genuine_connection = get_genuine_zk() + fake_connection = get_fake_zk() + + compare_states(genuine_connection, fake_connection) + + # especially ensure that counters are the same + genuine_connection.create("/test_sequential/" + "a" * 10 + "-", get_bytes("dataX" + str(i)), sequence=True) + fake_connection.create("/test_sequential/" + "a" * 10 + "-", get_bytes("dataX" + str(i)), sequence=True) + + first_children = list(sorted(genuine_connection.get_children("/test_sequential"))) + second_children = list(sorted(fake_connection.get_children("/test_sequential"))) + assert first_children == second_children, "Childrens are not equal on path " + path + + +def test_multi_and_failed_requests(started_cluster): + restart_and_clear_zookeeper() + + genuine_connection = get_genuine_zk() + genuine_connection.create('/test_multitransactions') + for i in range(10): + t = genuine_connection.transaction() + t.create('/test_multitransactions/freddy' + str(i), get_bytes('data' + str(i))) + t.create('/test_multitransactions/fred' + str(i), get_bytes('value' + str(i)), ephemeral=True) + t.create('/test_multitransactions/smith' + str(i), get_bytes('entity' + str(i)), sequence=True) + t.set_data('/test_multitransactions', get_bytes("somedata" + str(i))) + t.commit() + + with pytest.raises(Exception): + genuine_connection.set('/test_multitransactions/freddy0', get_bytes('mustfail' + str(i)), version=1) + + t = genuine_connection.transaction() + + t.create('/test_bad_transaction', get_bytes('data' + str(1))) + t.check('/test_multitransactions', version=32) + t.create('/test_bad_transaction1', get_bytes('data' + str(2))) + # should fail + t.commit() + + assert genuine_connection.exists('/test_bad_transaction') is None + assert genuine_connection.exists('/test_bad_transaction1') is None + + t = genuine_connection.transaction() + t.create('/test_bad_transaction2', get_bytes('data' + str(1))) + t.delete('/test_multitransactions/freddy0', version=5) + + # should fail + t.commit() + assert genuine_connection.exists('/test_bad_transaction2') is None + assert genuine_connection.exists('/test_multitransactions/freddy0') is not None + + copy_zookeeper_data() + + genuine_connection = get_genuine_zk() + fake_connection = get_fake_zk() + + compare_states(genuine_connection, fake_connection) + + +#def test_acls(started_cluster): +# restart_and_clear_zookeeper() +# genuine_connection = get_genuine_zk() +# genuine_connection.add_auth('digest', 'user1:password1') +# genuine_connection.add_auth('digest', 'user2:password2') +# genuine_connection.add_auth('digest', 'user3:password3') +# +# genuine_connection.create("/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)]) +# +# other_connection = get_genuine_zk() +# other_connection.add_auth('digest', 'user1:password1') +# other_connection.set("/test_multi_all_acl", b"X") +# assert other_connection.get("/test_multi_all_acl")[0] == b"X" +# +# yet_other_auth_connection = get_genuine_zk() +# yet_other_auth_connection.add_auth('digest', 'user2:password2') +# +# yet_other_auth_connection.set("/test_multi_all_acl", b"Y") +# +# copy_zookeeper_data() +# +# genuine_connection = get_genuine_zk() +# genuine_connection.add_auth('digest', 'user1:password1') +# genuine_connection.add_auth('digest', 'user2:password2') +# genuine_connection.add_auth('digest', 'user3:password3') +# +# fake_connection = get_fake_zk() +# fake_connection.add_auth('digest', 'user1:password1') +# fake_connection.add_auth('digest', 'user2:password2') +# fake_connection.add_auth('digest', 'user3:password3') +# +# compare_states(genuine_connection, fake_connection) From 1510a8a01301481abda7c09d6ebcffa4138b9379 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:24:09 +0800 Subject: [PATCH 018/183] Create zh translation for rabbitmq.md --- .../table-engines/integrations/rabbitmq.md | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 docs/zh/engines/table-engines/integrations/rabbitmq.md diff --git a/docs/zh/engines/table-engines/integrations/rabbitmq.md b/docs/zh/engines/table-engines/integrations/rabbitmq.md new file mode 100644 index 00000000000..c43218da14f --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/rabbitmq.md @@ -0,0 +1,167 @@ +--- +toc_priority: 10 +toc_title: RabbitMQ +--- + +# RabbitMQ 引擎 {#rabbitmq-engine} + +该引擎允许 ClickHouse 与 [RabbitMQ](https://www.rabbitmq.com) 进行集成. + +`RabbitMQ` 可以让你: + +- 发布或订阅数据流。 +- 在数据流可用时进行处理。 + +## 创建一张表 {#table_engine-rabbitmq-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = RabbitMQ SETTINGS + rabbitmq_host_port = 'host:port', + rabbitmq_exchange_name = 'exchange_name', + rabbitmq_format = 'data_format'[,] + [rabbitmq_exchange_type = 'exchange_type',] + [rabbitmq_routing_key_list = 'key1,key2,...',] + [rabbitmq_row_delimiter = 'delimiter_symbol',] + [rabbitmq_schema = '',] + [rabbitmq_num_consumers = N,] + [rabbitmq_num_queues = N,] + [rabbitmq_queue_base = 'queue',] + [rabbitmq_deadletter_exchange = 'dl-exchange',] + [rabbitmq_persistent = 0,] + [rabbitmq_skip_broken_messages = N,] + [rabbitmq_max_block_size = N,] + [rabbitmq_flush_interval_ms = N] +``` + +必要参数: + +- `rabbitmq_host_port` – 主机名:端口号 (比如, `localhost:5672`). +- `rabbitmq_exchange_name` – RabbitMQ exchange 名称. +- `rabbitmq_format` – 消息格式. 使用与SQL`FORMAT`函数相同的标记,如`JSONEachRow`。 更多信息,请参阅 [Formats](../../../interfaces/formats.md) 部分. + +可选参数: + +- `rabbitmq_exchange_type` – RabbitMQ exchange 的类型: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. 默认是: `fanout`. +- `rabbitmq_routing_key_list` – 一个以逗号分隔的路由键列表. +- `rabbitmq_row_delimiter` – 用于消息结束的分隔符. +- `rabbitmq_schema` – 如果格式需要模式定义,必须使用该参数。比如, [Cap’n Proto](https://capnproto.org/) 需要模式文件的路径以及根 `schema.capnp:Message` 对象的名称. +- `rabbitmq_num_consumers` – 每个表的消费者数量。默认:`1`。如果一个消费者的吞吐量不够,可以指定更多的消费者. +- `rabbitmq_num_queues` – 队列的总数。默认值: `1`. 增加这个数字可以显著提高性能. +- `rabbitmq_queue_base` - 指定一个队列名称的提示。这个设置的使用情况如下. +- `rabbitmq_deadletter_exchange` - 为[dead letter exchange](https://www.rabbitmq.com/dlx.html)指定名称。你可以用这个 exchange 的名称创建另一个表,并在消息被重新发布到 dead letter exchange 的情况下收集它们。默认情况下,没有指定 dead letter exchange。Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). +- `rabbitmq_persistent` - 如果设置为 1 (true), 在插入查询中交付模式将被设置为 2 (将消息标记为 'persistent'). 默认是: `0`. +- `rabbitmq_skip_broken_messages` – RabbitMQ 消息解析器对每块模式不兼容消息的容忍度。默认值:`0`. 如果 `rabbitmq_skip_broken_messages = N`,那么引擎将跳过 *N* 个无法解析的 RabbitMQ 消息(一条消息等于一行数据)。 +- `rabbitmq_max_block_size` +- `rabbitmq_flush_interval_ms` + +同时,格式的设置也可以与 rabbitmq 相关的设置一起添加。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; +``` + +RabbitMQ 服务器配置应使用 ClickHouse 配置文件添加。 + +必要配置: + +``` xml + + root + clickhouse + +``` + +可选配置: + +``` xml + + clickhouse + +``` + +## 描述 {#description} + +`SELECT`对于读取消息不是特别有用(除了调试),因为每个消息只能读取一次。使用[物化视图](../../../sql-reference/statements/create/view.md)创建实时线程更为实用。要做到这一点: + +1. 使用引擎创建一个 RabbitMQ 消费者,并将其视为一个数据流。 +2. 创建一个具有所需结构的表。 +3. 创建一个物化视图,转换来自引擎的数据并将其放入先前创建的表中。 + +当`物化视图`加入引擎时,它开始在后台收集数据。这允许您持续接收来自 RabbitMQ 的消息,并使用 `SELECT` 将它们转换为所需格式。 +一个 RabbitMQ 表可以有多个你需要的物化视图。 + +数据可以根据`rabbitmq_exchange_type`和指定的`rabbitmq_routing_key_list`进行通道。 +每个表不能有多于一个 exchange。一个 exchange 可以在多个表之间共享 - 因为可以使用路由让数据同时进入多个表。 + +Exchange 类型的选项: + +- `direct` - 路由是基于精确匹配的键。例如表的键列表: `key1,key2,key3,key4,key5`, 消息键可以是等同他们中的任意一个. +- `fanout` - 路由到所有的表 (exchange 名称相同的情况) 无论是什么键都是这样. +- `topic` - 路由是基于带有点分隔键的模式. 比如: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. +- `headers` - 路由是基于`key=value`的匹配,设置为`x-match=all`或`x-match=any`. 例如表的键列表: `x-match=all,format=logs,type=report,year=2020`. +- `consistent_hash` - 数据在所有绑定的表之间均匀分布 (exchange 名称相同的情况). 请注意,这种 exchange 类型必须启用 RabbitMQ 插件: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. + +设置`rabbitmq_queue_base`可用于以下情况: + +- 来让不同的表共享队列, 这样就可以为同一个队列注册多个消费者,这使得性能更好。如果使用`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置,在这些参数相同的情况下,实现队列的精确匹配。 +- 以便在不是所有消息都被成功消费时,能够恢复从某些持久队列的阅读。要从一个特定的队列恢复消耗 - 在`rabbitmq_queue_base`设置中设置其名称,不要指定`rabbitmq_num_consumers`和`rabbitmq_num_queues`(默认为1)。要恢复所有队列的消费,这些队列是为一个特定的表所声明的 - 只要指定相同的设置。`rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`。默认情况下,队列名称对表来说是唯一的。 +- 以重复使用队列,因为它们被声明为持久的,并且不会自动删除。可以通过任何 RabbitMQ CLI 工具删除) + +为了提高性能,收到的消息被分组为大小为 [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size) 的块。如果在[stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md)毫秒内没有形成数据块,无论数据块是否完整,数据都会被刷到表中。 + +如果`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置与`rabbitmq_exchange_type`一起被指定,那么: + +- 必须启用`rabbitmq-consistent-hash-exchange` 插件. +- 必须指定已发布信息的 `message_id`属性(对于每个信息/批次都是唯一的)。 + +对于插入查询时有消息元数据,消息元数据被添加到每个发布的消息中:`messageID`和`republished`标志(如果值为true,则表示消息发布不止一次) - 可以通过消息头访问。 + +不要在插入和物化视图中使用同一个表。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64 + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5; + + CREATE TABLE daily (key UInt64, value UInt64) + ENGINE = MergeTree() ORDER BY key; + + CREATE MATERIALIZED VIEW consumer TO daily + AS SELECT key, value FROM queue; + + SELECT key, value FROM daily ORDER BY key; +``` + +## 虚拟列 {#virtual-columns} + +- `_exchange_name` - RabbitMQ exchange 名称. +- `_channel_id` - 接收消息的消费者所声明的频道ID. +- `_delivery_tag` - 收到消息的DeliveryTag. 以每个频道为范围. +- `_redelivered` - 消息的`redelivered`标志. +- `_message_id` - 收到的消息的ID;如果在消息发布时被设置,则为非空. +- `_timestamp` - 收到的消息的时间戳;如果在消息发布时被设置,则为非空. + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/rabbitmq/) From c56147c98ef42b25c4206e8ef3af14d857b1a2c3 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:35:40 +0800 Subject: [PATCH 019/183] Create zh translation for s3.md --- docs/zh/sql-reference/table-functions/s3.md | 132 ++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 docs/zh/sql-reference/table-functions/s3.md diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md new file mode 100644 index 00000000000..0a446dbc460 --- /dev/null +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -0,0 +1,132 @@ +--- +toc_priority: 45 +toc_title: s3 +--- + +# S3 表函数 {#s3-table-function} + +提供类似于表的接口来 select/insert [Amazon S3](https://aws.amazon.com/s3/)中的文件。这个表函数类似于[hdfs](../../sql-reference/table-functions/hdfs.md),但提供了 S3 特有的功能。 + +**语法** + +``` sql +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +``` + +**参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../../interfaces/formats.md#formats). +- `structure` — 表的结构. 格式像这样 `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**返回值** + +一个具有指定结构的表,用于读取或写入指定文件中的数据。 + +**示例** + +从 S3 文件`https://storage.yandexcloud.net/my-test-bucket-768/data.csv`中选择表格的前两行: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +类似的情况,但来源是`gzip`压缩的文件: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## 用法 {#usage-examples} + +假设我们在S3上有几个文件,URI如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' + +计算以数字1至3结尾的文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +计算这两个目录中所有文件的行的总量: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +!!! warning "Warning" + 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +从已有的表插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +**另请参阅** + +- [S3 引擎](../../engines/table-engines/integrations/s3.md) + +[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) From 89e7857012c4c4703dd343fafc97a81dc10b043f Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 20 Jun 2021 01:25:11 +0300 Subject: [PATCH 020/183] Create s3Cluster.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Сделал грязный черновик описания табличной функции s3Cluster. Буду корректировать примеры, которые не работают. --- .../table-functions/s3Cluster.md | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 docs/en/sql-reference/table-functions/s3Cluster.md diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md new file mode 100644 index 00000000000..b49da53f01a --- /dev/null +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -0,0 +1,47 @@ +--- +toc_priority: 55 +toc_title: s3Cluster +--- + +# S3Cluster Table Function {#s3Cluster-table-function} + +Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator, it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatch each file dynamically. On the worker node, it asks the initiator about the next task to process, processes it. This is repeated until the tasks are finished. + +**Syntax** + +``` sql +s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Selecting the data from the cluster `cluster_simple` using source `http://minio1:9001/root/data/{clickhouse,database}/*`: + +``` sql +SELECT * from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); +``` + +Count the total amount of rows in all files of the cluster `cluster_simple`: + +``` sql +SELECT count(*) from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); +``` + +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +**See Also** + +- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [S3 table function](../../sql-reference/table-functions/s3.md) From 7e0e18260892aaf2779f4ef0c4f451026d77d296 Mon Sep 17 00:00:00 2001 From: meoww-bot <14239840+meoww-bot@users.noreply.github.com> Date: Sun, 20 Jun 2021 18:13:16 +0800 Subject: [PATCH 021/183] fix wrong link --- docs/zh/sql-reference/table-functions/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md index 0a446dbc460..c55412f4ddd 100644 --- a/docs/zh/sql-reference/table-functions/s3.md +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -16,7 +16,7 @@ s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compres **参数** - `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). -- `format` — 文件的[格式](../../../interfaces/formats.md#formats). +- `format` — 文件的[格式](../../interfaces/formats.md#formats). - `structure` — 表的结构. 格式像这样 `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. From e1b509c8b1acd781c5626fe9f273f66cf121b796 Mon Sep 17 00:00:00 2001 From: Tiaonmmn Date: Mon, 21 Jun 2021 20:53:09 +0800 Subject: [PATCH 022/183] Update mergetree.md Update translation and add new contents. --- .../mergetree-family/mergetree.md | 449 ++++++++++++------ 1 file changed, 302 insertions(+), 147 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 353dd5f5bc8..45e080fd640 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -6,21 +6,21 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 主要特点: -- 存储的数据按主键排序。 +- 存储的数据按主键排序。 - 这使得你能够创建一个小型的稀疏索引来加快数据检索。 + 这使得您能够创建一个小型的稀疏索引来加快数据检索。 -- 支持数据分区,如果指定了 [分区键](custom-partitioning-key.md) 的话。 +- 如果指定了 [分区键](custom-partitioning-key.md) 的话,可以使用分区。 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 -- 支持数据副本。 +- 支持数据副本。 `ReplicatedMergeTree` 系列的表提供了数据副本功能。更多信息,请参阅 [数据副本](replication.md) 一节。 -- 支持数据采样。 +- 支持数据采样。 - 需要的话,你可以给表设置一个采样方法。 + 需要的话,您可以给表设置一个采样方法。 !!! note "注意" [合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 @@ -50,54 +50,58 @@ ORDER BY expr **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - -- `ORDER BY` — 排序键。 +- `ORDER BY` — 排序键。 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 - - 如果没有使用 `PRIMARY KEY` 显式的指定主键,ClickHouse 会使用排序键作为主键。 - + + 如果没有使用 `PRIMARY KEY` 显式指定的主键,ClickHouse 会使用排序键作为主键。 + 如果不需要排序,可以使用 `ORDER BY tuple()`. 参考 [选择主键](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#selecting-the-primary-key) -- `PARTITION BY` — [分区键](custom-partitioning-key.md) 。 +- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。 -- `PRIMARY KEY` - 主键,如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),可选。 +- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 -- `SAMPLE BY` — 用于抽样的表达式。 +- `SAMPLE BY` - 用于抽样的表达式,可选项。 如果要用抽样表达式,主键中必须包含这个表达式。例如: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 - -- TTL 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选。 + +- `TTL` - 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选项。 表达式中必须存在至少一个 `Date` 或 `DateTime` 类型的列,比如: - + `TTL date + INTERVAl 1 DAY` - + 规则的类型 `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'`指定了当满足条件(到达指定时间)时所要执行的动作:移除过期的行,还是将数据片段(如果数据片段中的所有行都满足表达式的话)移动到指定的磁盘(`TO DISK 'xxx'`) 或 卷(`TO VOLUME 'xxx'`)。默认的规则是移除(`DELETE`)。可以在列表中指定多个规则,但最多只能有一个`DELETE`的规则。 - + 更多细节,请查看 [表和列的 TTL](#table_engine-mergetree-ttl) -- `SETTINGS` — 控制 `MergeTree` 行为的额外参数: +- `SETTINGS` — 控制 `MergeTree` 行为的额外参数,可选项: - - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值,8192 。参考[数据存储](#mergetree-data-storage)。 - - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 - - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果你的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 - - `use_minimalistic_part_header_in_zookeeper` — 是否在 ZooKeeper 中启用最小的数据片段头 。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 - - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 + - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值8192 。参考[数据存储](#mergetree-data-storage)。 + - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 + - `min_index_granularity_bytes` - 允许的最小数据粒度,默认值:1024b。该选项用于防止误操作,添加了一个非常低索引粒度的表。参考[数据存储](#mergetree-data-storage) + - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果您的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 + - `use_minimalistic_part_header_in_zookeeper` — ZooKeeper中数据片段存储方式 。如果`use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考[服务配置参数]([Server Settings | ClickHouse Documentation](https://clickhouse.tech/docs/zh/operations/server-configuration-parameters/settings/))这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 - - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 - - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不建议更改)。 - - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 - - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). - - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。你可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 + - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不要关闭)。 + - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 + - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). + - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。您可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `max_parts_in_total` - 所有分区中最大块的数量(意义不明) + - `max_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最大大小。您可以在全局设置中设置该值(参见[max_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#max-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `min_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最小大小。您可以在全局设置中设置该值(参见[min_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#min-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `max_partitions_to_read` - 一次查询中可访问的分区最大数。您可以在全局设置中设置该值(参见[max_partitions_to_read](https://clickhouse.tech/docs/zh/operations/settings/settings/#max_partitions_to_read))。 **示例配置** @@ -107,12 +111,11 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 在这个例子中,我们设置了按月进行分区。 -同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得你可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果你在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 +同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得您可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果您在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 `index_granularity` 可省略因为 8192 是默认设置 。
- 已弃用的建表方法 !!! attention "注意" @@ -127,10 +130,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa **MergeTree() 参数** -- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 -- `sampling_expression` — 采样表达式。 -- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) -- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 +- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `sampling_expression` — 采样表达式。 +- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) +- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 **示例** @@ -152,51 +155,55 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 数据存储格式由 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 表引擎参数控制。如果数据片段中的字节数或行数少于相应的设置值,数据片段会以 `Compact` 格式存储,否则会以 `Wide` 格式存储。 每个数据片段被逻辑的分割成颗粒(granules)。颗粒是 ClickHouse 中进行数据查询时的最小不可分割数据集。ClickHouse 不会对行或值进行拆分,所以每个颗粒总是包含整数个行。每个颗粒的第一行通过该行的主键值进行标记, -ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让你可以在列文件中直接找到数据。 +ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让您可以在列文件中直接找到数据。 -颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。取决于行的大小,颗粒的行数的在 `[1, index_granularity]` 范围中。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 +颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。颗粒的行数的在 `[1, index_granularity]` 范围中,这取决于行的大小。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 ## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queries} 我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: +``` text 全部数据 : [-------------------------------------------------------------------------] CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] 标记: | | | | | | | | | | | a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 标记号: 0 1 2 3 4 5 6 7 8 9 10 +``` 如果指定查询如下: -- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 -- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 -- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 +- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 +- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 +- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 上面例子可以看出使用索引通常会比全表描述要高效。 稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。 -稀疏索引使得你可以处理极大量的行,因为大多数情况下,这些索引常驻与内存(RAM)中。 +稀疏索引使得您可以处理极大量的行,因为大多数情况下,这些索引常驻于内存。 -ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键的行。 +ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 + +您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](https://clickhouse.tech/docs/zh/operations/settings/settings/#allow-nullable-key),[NULLS_LAST](https://clickhouse.tech/docs/zh/sql-reference/statements/select/order-by/#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 ### 主键的选择 {#zhu-jian-de-xuan-ze} -主键中列的数量并没有明确的限制。依据数据结构,你可以在主键包含多些或少些列。这样可以: +主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: -- 改善索引的性能。 +- 改善索引的性能。 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让你的查询略过很长的数据范围。 -- 改善数据压缩。 + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + +- 改善数据压缩。 ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 -- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 +- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 @@ -206,9 +213,9 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 想要根据初始顺序进行数据查询,使用 [单线程查询](../../../operations/settings/settings.md#settings-max_threads) -### 选择与排序键不同主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} +### 选择与排序键不同的主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} -指定一个跟排序键不一样的主键是可以的,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀。 +Clickhouse可以做到指定一个跟排序键不一样的主键,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀(即主键为(a,b),排序列必须为(a,b,******))。 当使用 [SummingMergeTree](summingmergetree.md) 和 [AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。通常在使用这类引擎时,表里的列分两种:*维度* 和 *度量* 。典型的查询会通过任意的 `GROUP BY` 对度量列进行聚合并通过维度列进行过滤。由于 SummingMergeTree 和 AggregatingMergeTree 会对排序键相同的行进行聚合,所以把所有的维度放进排序键是很自然的做法。但这将导致排序键中包含大量的列,并且排序键会伴随着新添加的维度不断的更新。 @@ -218,14 +225,20 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 ### 索引和分区在查询中的应用 {#use-of-indexes-and-partitions-in-queries} -对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为谓词链接一子项或整个)则可以使用索引:包含一个表示与主键/分区键中的部分字段或全部字段相等/不等的比较表达式;基于主键/分区键的字段上的 `IN` 或 固定前缀的`LIKE` 表达式;基于主键/分区键的字段上的某些函数;基于主键/分区键的表达式的逻辑表达式。 +对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为完整WHERE条件的一部分或全部)则可以使用索引:进行相等/不相等的比较;对主键列或分区列进行`IN`运算、有固定前缀的`LIKE`运算(如name like 'test%')、函数运算(部分函数适用),还有对上述表达式进行逻辑运算。 + + -因此,在索引键的一个或多个区间上快速地执行查询都是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 + + +因此,在索引键的一个或多个区间上快速地执行查询是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 当引擎配置如下时: +``` sql ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` 这种情况下,这些查询: @@ -237,7 +250,7 @@ SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDat ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 -上文的查询显示,即使索引用于复杂表达式。因为读表操作是组织好的,所以,使用索引不会比完整扫描慢。 +上文的查询显示,即使索引用于复杂表达式,因为读表操作经过优化,所以使用索引不会比完整扫描慢。 下面这个例子中,不会使用索引。 @@ -247,17 +260,16 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' 要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../../../operations/settings/settings.md) 。 -按月分区的分区键是只能读取包含适当范围日期的数据块。这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有带主键前几个字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 +使用按月分区的分区列允许只读取包含适当日期区间的数据块,这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有用主键字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 ### 部分单调主键的使用 -考虑这样的场景,比如一个月中的几天。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 +考虑这样的场景,比如一个月中的天数。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 如果查询参数范围内的主键不是单调序列,那么 ClickHouse 无法使用索引。在这种情况下,ClickHouse 会进行全表扫描。 ClickHouse 在任何主键代表一个部分单调序列的情况下都会使用这个逻辑。 - ### 跳数索引 {#tiao-shu-suo-yin-fen-duan-hui-zong-suo-yin-shi-yan-xing-de} 此索引在 `CREATE` 语句的列部分里定义。 @@ -267,11 +279,7 @@ INDEX index_name expr TYPE type(...) GRANULARITY granularity_value ``` `*MergeTree` 系列的表可以指定跳数索引。 - -这些索引是由数据块按粒度分割后的每部分在指定表达式上汇总信息 `granularity_value` 组成(粒度大小用表引擎里 `index_granularity` 的指定)。 -这些汇总信息有助于用 `where` 语句跳过大片不满足的数据,从而减少 `SELECT` 查询从磁盘读取的数据量, - -这些索引会在数据块上聚合指定表达式的信息,这些信息以 granularity_value 指定的粒度组成 (粒度的大小通过在表引擎中定义 index_granularity 定义)。这些汇总信息有助于跳过大片不满足 `where` 条件的数据,从而减少 `SELECT` 查询从磁盘读取的数据量。 +跳数索引是指数据片段按照粒度(建表时指定的`index_granularity`)分割成小块后,将上述SQL的granularity_value数量的小块组合成一个大的块,对这些大块写入索引信息,这样有助于使用`where`筛选时跳过大量不必要的数据,减少`SELECT`需要读取的数据量。 **示例** @@ -295,34 +303,32 @@ SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` -#### 索引的可用类型 {#table_engine-mergetree-data_skipping-indexes} +#### 可用的索引类型 {#table_engine-mergetree-data_skipping-indexes} -- `minmax` +- `minmax` 存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 -- `set(max_rows)` - 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查 数据块是否满足 `WHERE` 条件。 +- `set(max_rows)` + 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查数据块是否满足 `WHERE` 条件。 -- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` 存储一个包含数据块中所有 n元短语(ngram) 的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 可用于优化 `equals` , `like` 和 `in` 表达式的性能。 - `n` – 短语长度。 - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,单位字节。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 - `random_seed` – 哈希函数的随机种子。 + - `n` – 短语长度。 + - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,字节为单位。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 + - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 + - `random_seed` – 哈希函数的随机种子。 -- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - 跟 `ngrambf_v1` 类似,不同于 ngrams 存储字符串指定长度的所有片段。它只存储被非字母数字字符分割的片段。 +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` + 跟 `ngrambf_v1` 类似,但是存储的是token而不是ngrams。Token是由非字母数字的符号分割的序列。 -- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 +- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 + + 可选参数`false_positive`用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 可选的参数 false_positive 用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 - + 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) - - ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 @@ -332,56 +338,56 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY #### 函数支持 {#functions-support} -WHERE 子句中的条件包含对列的函数调用,如果列是索引的一部分,ClickHouse 会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 +WHERE 子句中的条件可以包含对某列数据进行运算的函数表达式,如果列是索引的一部分,ClickHouse会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 `set` 索引会对所有函数生效,其他索引对函数的生效情况见下表 -| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | -|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| -| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | +| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ | +| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | 常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 !!! note "注意" -布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于负向的函数,例如: +布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: -- 可以用来优化的场景 - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- 不能用来优化的场景 - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` +- 可以用来优化的场景 + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- 不能用来优化的场景 + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` ## 并发数据访问 {#concurrent-data-access} -应对表的并发访问,我们使用多版本机制。换言之,当同时读和更新表时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 +对于表的并发访问,我们使用多版本机制。换言之,当一张表同时被读和更新时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 对表的读操作是自动并行的。 ## 列和表的 TTL {#table_engine-mergetree-ttl} -TTL 可以设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 +TTL用于设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 TTL 表达式的计算结果必须是 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 或 [日期时间](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的字段。 @@ -405,7 +411,7 @@ TTL date_time + INTERVAL 15 HOUR `TTL`子句不能被用于主键字段。 -示例: +**示例:** 创建表时指定 `TTL` @@ -443,16 +449,23 @@ ALTER TABLE example_table 表可以设置一个用于移除过期行的表达式,以及多个用于在磁盘或卷上自动转移数据片段的表达式。当表中的行过期时,ClickHouse 会删除所有对应的行。对于数据片段的转移特性,必须所有的行都满足转移条件。 ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] + ``` TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作: -- `DELETE` - 删除过期的行(默认操作); -- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; -- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `DELETE` - 删除过期的行(默认操作); +- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; +- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `GROUP BY` - 聚合过期的行 -示例: +使用`WHERE`从句,您可以指定哪些过期的行会被删除或聚合(不适用于移动)。`GROUP BY`表达式必须是表主键的前缀。如果某列不是`GROUP BY`表达式的一部分,也没有在SET从句显示引用,结果行中相应列的值是随机的(就好像使用了`any`函数)。 + +**示例**: 创建时指定 TTL @@ -477,19 +490,49 @@ ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +创建一张表,设置一个月后数据过期,这些过期的行中日期为星期一的删除: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +创建一张表,设置过期的列会被聚合。列`x`包含每组行中的最大值,`y`为最小值,`d`为可能任意值。 + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY (k1, k2) +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **删除数据** ClickHouse 在数据片段合并时会删除掉过期的数据。 -当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 你可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 +当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 -如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 查询。 +如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。 -## 使用具有多个块的设备进行数据存储 {#table_engine-mergetree-multiple-volumes} +## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} ### 介绍 {#introduction} -MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。近期数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 +MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。最新数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 数据片段是 `MergeTree` 引擎表的最小可移动单元。属于同一个数据片段的数据被存储在同一块磁盘上。数据片段会在后台自动的在磁盘间移动,也可以通过 [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) 查询来移动。 @@ -497,12 +540,14 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 - 磁盘 — 挂载到文件系统的块设备 - 默认磁盘 — 在服务器设置中通过 [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) 参数指定的数据存储 -- 卷 — 磁盘的等效有序集合 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) +- 卷 — 相同磁盘的顺序列表 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) - 存储策略 — 卷的集合及他们之间的数据移动规则 + 以上名称的信息在Clickhouse中系统表[system.storage_policies](https://clickhouse.tech/docs/zh/operations/system-tables/storage_policies/#system_tables-storage_policies)和[system.disks](https://clickhouse.tech/docs/zh/operations/system-tables/disks/#system_tables-disks)体现。为了应用存储策略,可以在建表时使用`storage_policy`设置。 + ### 配置 {#table_engine-mergetree-multiple-volumes_configure} -磁盘、卷和存储策略应当在主文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 +磁盘、卷和存储策略应当在主配置文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 配置结构: @@ -530,9 +575,9 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `` — 磁盘名,名称必须与其他磁盘不同. -- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. -- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. +- `` — 磁盘名,名称必须与其他磁盘不同. +- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. +- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. 磁盘定义的顺序无关紧要。 @@ -567,11 +612,12 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `policy_name_N` — 策略名称,不能重复。 -- `volume_name_N` — 卷名称,不能重复。 -- `disk` — 卷中的磁盘。 -- `max_data_part_size_bytes` — 任意卷上的磁盘可以存储的数据片段的最大大小。 -- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `policy_name_N` — 策略名称,不能重复。 +- `volume_name_N` — 卷名称,不能重复。 +- `disk` — 卷中的磁盘。 +- `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 +- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `prefer_not_to_merge` - 禁止在这个卷中进行数据合并。该选项启用时,对该卷的数据不能进行合并。这个选项主要用于慢速磁盘。 配置示例: @@ -600,19 +646,31 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 0.2 + + + +
+ jbod1 +
+ + external + true + +
+
...
``` -在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,你可能需要用 3 或更大的复制因此来补偿它。 +在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,您可能需要用3份或更多的复制份数来补偿它。 如果在系统中有不同类型的磁盘可用,可以使用 `moving_from_ssd_to_hdd`。`hot` 卷由 SSD 磁盘(`fast_ssd`)组成,这个卷上可以存储的数据片段的最大大小为 1GB。所有大于 1GB 的数据片段都会被直接存储到 `cold` 卷上,`cold` 卷包含一个名为 `disk1` 的 HDD 磁盘。 同样,一旦 `fast_ssd` 被填充超过 80%,数据会通过后台进程向 `disk1` 进行转移。 存储策略中卷的枚举顺序是很重要的。因为当一个卷被充满时,数据会向下一个卷转移。磁盘的枚举顺序同样重要,因为数据是依次存储在磁盘上的。 -在创建表时,可以将一个配置好的策略应用到表: +在创建表时,可以应用存储策略: ``` sql CREATE TABLE table_with_non_default_policy ( @@ -626,7 +684,7 @@ PARTITION BY toYYYYMM(EventDate) SETTINGS storage_policy = 'moving_from_ssd_to_hdd' ``` -`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。表创建后,它的存储策略就不能改变了。 +`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 可以通过 [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 @@ -634,24 +692,121 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 对于 `MergeTree` 表,数据通过以下不同的方式写入到磁盘当中: -- 作为插入(`INSERT`查询)的结果 -- 在后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations)期间 -- 当从另一个副本下载时 -- 作为 [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区的结果 +- 插入(`INSERT`查询) +- 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) +- 从另一个副本下载 +- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: -1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) -2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) +1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) +2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) -更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到初始化的那一块磁盘上。 +更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到原来的那一块磁盘上。 -在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。同时,具体细节可以通过服务器日志查看。 +在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 -在后台合并和数据变异之后,就的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 +在后台合并和数据变异之后,旧的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 + +## 使用S3进行数据存储 {#using-s3-data-storage} + +`MergeTree`系列表引擎允许使用[S3](https://aws.amazon.com/s3/)存储数据,需要修改磁盘类型为`S3`。 + +示例配置: + +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + your_base64_encoded_customer_key + + http://proxy1 + http://proxy2 + + 10000 + 5000 + 10 + 4 + 1000 + /var/lib/clickhouse/disks/s3/ + true + /var/lib/clickhouse/disks/s3/cache/ + false + + + ... + +``` + +必须的参数: + +- `endpoint` - S3的结点URL,以`path`或`virtual hosted`[格式](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html)书写。 +- `access_key_id` - S3的Access Key ID。 +- `secret_access_key` - S3的Secret Access Key。 + +可选参数: + +- `region` - S3的区域名称 +- `use_environment_credentials` - 从环境变量AWS_ACCESS_KEY_ID、AWS_SECRET_ACCESS_KEY和AWS_SESSION_TOKEN中读取认证参数。默认值为`false`。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端在认证时会使用不安全的IMDS请求。默认值为`false`。 +- `proxy` - 访问S3结点URL时代理设置。每一个`uri`项的值都应该是合法的代理URL。 +- `connect_timeout_ms` - Socket连接超时时间,默认值为`10000`,即10秒。 +- `request_timeout_ms` - 请求超时时间,默认值为`5000`,即5秒。 +- `retry_attempts` - 请求失败后的重试次数,默认值为10。 +- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 +- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 +- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` +- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。 +- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks//cache/`。 +- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 +- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 + +S3磁盘也可以设置冷热存储: +```xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + + + + +
+ s3 +
+
+
+ + +
+ default +
+ + s3 + +
+ 0.2 +
+
+ ... +
+``` + +指定了`cold`选项后,本地磁盘剩余空间如果小于`move_factor * disk_size`,或有TTL设置时,数据就会定时迁移至S3了。 [原始文章](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) From 76cee4e3cf68ae68755e333c919c787d4c2181a4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Jun 2021 16:58:39 +0300 Subject: [PATCH 023/183] Debugging --- src/Common/ZooKeeper/ZooKeeperIO.cpp | 16 +++++ src/Common/ZooKeeper/ZooKeeperIO.h | 9 +++ src/Coordination/KeeperStorage.cpp | 15 +++-- src/Coordination/ZooKeeperDataReader.cpp | 2 +- tests/integration/helpers/cluster.py | 2 +- .../test_keeper_zookeeper_converter/test.py | 64 +++++++++---------- 6 files changed, 68 insertions(+), 40 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperIO.cpp b/src/Common/ZooKeeper/ZooKeeperIO.cpp index 55448c9a109..0e0a034c633 100644 --- a/src/Common/ZooKeeper/ZooKeeperIO.cpp +++ b/src/Common/ZooKeeper/ZooKeeperIO.cpp @@ -9,6 +9,14 @@ void write(size_t x, WriteBuffer & out) writeBinary(x, out); } +#ifdef __APPLE__ +void write(uint64_t x, WriteBuffer & out) +{ + x = __builtin_bswap64(x); + writeBinary(x, out); +} +#endif + void write(int64_t x, WriteBuffer & out) { x = __builtin_bswap64(x); @@ -63,6 +71,14 @@ void write(const Error & x, WriteBuffer & out) write(static_cast(x), out); } +#ifdef __APPLE__ +void read(uint64_t & x, ReadBuffer & in) +{ + readBinary(x, in); + x = __builtin_bswap64(x); +} +#endif + void read(size_t & x, ReadBuffer & in) { readBinary(x, in); diff --git a/src/Common/ZooKeeper/ZooKeeperIO.h b/src/Common/ZooKeeper/ZooKeeperIO.h index fd47e324664..1fcb96315a5 100644 --- a/src/Common/ZooKeeper/ZooKeeperIO.h +++ b/src/Common/ZooKeeper/ZooKeeperIO.h @@ -14,6 +14,12 @@ namespace Coordination using namespace DB; void write(size_t x, WriteBuffer & out); + +/// uint64_t != size_t on darwin +#ifdef __APPLE__ +void write(uint64_t x, WriteBuffer & out); +#endif + void write(int64_t x, WriteBuffer & out); void write(int32_t x, WriteBuffer & out); void write(OpNum x, WriteBuffer & out); @@ -39,6 +45,9 @@ void write(const std::vector & arr, WriteBuffer & out) } void read(size_t & x, ReadBuffer & in); +#ifdef __APPLE__ +void read(uint64_t & x, ReadBuffer & in); +#endif void read(int64_t & x, ReadBuffer & in); void read(int32_t & x, ReadBuffer & in); void read(OpNum & x, ReadBuffer & in); diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 5418afb2501..d59af287bab 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -57,7 +57,7 @@ static String generateDigest(const String & userdata) { std::vector user_password; boost::split(user_password, userdata, [](char c) { return c == ':'; }); - return user_password[0] + ":" + base64Encode(getSHA1(user_password[1])); + return user_password[0] + ":" + base64Encode(getSHA1(userdata)); } static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector & session_auths) @@ -71,14 +71,19 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c for (const auto & node_acl : node_acls) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL PERMISSIONS {} SESSION PERMS {}", node_acl.permissions, permission); if (node_acl.permissions & permission) { if (node_acl.scheme == "world" && node_acl.id == "anyone") return true; for (const auto & session_auth : session_auths) + { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL SCHEME {} SESSION SCHEME {}", node_acl.scheme, session_auth.scheme); + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL AUTHID {} SESSION AUTHID {}", node_acl.id, session_auth.id); if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) return true; + } } } @@ -353,16 +358,19 @@ struct KeeperStorageGetRequest final : public KeeperStorageRequest bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "CHECKING ACL FOR PATH {} IN GET", zk_request->getPath()); auto & container = storage.container; auto it = container.find(zk_request->getPath()); if (it == container.end()) return true; const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACLID {} ACL SIZE {}",it->value.acl_id, node_acls.size()); if (node_acls.empty()) return true; const auto & session_auths = storage.session_and_auth[session_id]; + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "SESSION AUTHS SIZE {}", session_auths.size()); return checkACL(Coordination::ACL::Read, node_acls, session_auths); } @@ -908,15 +916,10 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina KeeperStorage::ResponsesForSessions results; if (new_last_zxid) { - LOG_INFO(&Poco::Logger::get("DEBUG"), "GOT ZXID {}", *new_last_zxid); if (zxid >= *new_last_zxid) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got new ZXID {} smaller or equal than current {}. It's a bug", *new_last_zxid, zxid); zxid = *new_last_zxid; } - else - { - LOG_INFO(&Poco::Logger::get("DEBUG"), "NO ZXID PROVIDED"); - } session_expiry_queue.update(session_id, session_and_timeout[session_id]); if (zk_request->getOpNum() == Coordination::OpNum::Close) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 4a324abe93d..42440250ed8 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -429,7 +429,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) } -bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * log) +bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*log*/) { int64_t checksum; Coordination::read(checksum, in); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 59e598ce6ba..54e129fed11 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -213,7 +213,7 @@ class ClickHouseCluster: if self.name: instances_dir_name += '_' + self.name - if 'INTEGRATION_TESTS_RUN_ID' in os.environ: + if 'INTEGRATION_TESTS_RUN_ID' in os.environ and os.environ['INTEGRATION_TESTS_RUN_ID']: instances_dir_name += '_' + shlex.quote(os.environ['INTEGRATION_TESTS_RUN_ID']) self.instances_dir = p.join(self.base_dir, instances_dir_name) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 5c6ed90eb35..61f4248f2be 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -204,35 +204,35 @@ def test_multi_and_failed_requests(started_cluster): compare_states(genuine_connection, fake_connection) -#def test_acls(started_cluster): -# restart_and_clear_zookeeper() -# genuine_connection = get_genuine_zk() -# genuine_connection.add_auth('digest', 'user1:password1') -# genuine_connection.add_auth('digest', 'user2:password2') -# genuine_connection.add_auth('digest', 'user3:password3') -# -# genuine_connection.create("/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)]) -# -# other_connection = get_genuine_zk() -# other_connection.add_auth('digest', 'user1:password1') -# other_connection.set("/test_multi_all_acl", b"X") -# assert other_connection.get("/test_multi_all_acl")[0] == b"X" -# -# yet_other_auth_connection = get_genuine_zk() -# yet_other_auth_connection.add_auth('digest', 'user2:password2') -# -# yet_other_auth_connection.set("/test_multi_all_acl", b"Y") -# -# copy_zookeeper_data() -# -# genuine_connection = get_genuine_zk() -# genuine_connection.add_auth('digest', 'user1:password1') -# genuine_connection.add_auth('digest', 'user2:password2') -# genuine_connection.add_auth('digest', 'user3:password3') -# -# fake_connection = get_fake_zk() -# fake_connection.add_auth('digest', 'user1:password1') -# fake_connection.add_auth('digest', 'user2:password2') -# fake_connection.add_auth('digest', 'user3:password3') -# -# compare_states(genuine_connection, fake_connection) +def test_acls(started_cluster): + restart_and_clear_zookeeper() + genuine_connection = get_genuine_zk() + genuine_connection.add_auth('digest', 'user1:password1') + genuine_connection.add_auth('digest', 'user2:password2') + genuine_connection.add_auth('digest', 'user3:password3') + + genuine_connection.create("/test_multi_all_acl", b"data", acl=[make_acl("auth", "", all=True)]) + + other_connection = get_genuine_zk() + other_connection.add_auth('digest', 'user1:password1') + other_connection.set("/test_multi_all_acl", b"X") + assert other_connection.get("/test_multi_all_acl")[0] == b"X" + + yet_other_auth_connection = get_genuine_zk() + yet_other_auth_connection.add_auth('digest', 'user2:password2') + + yet_other_auth_connection.set("/test_multi_all_acl", b"Y") + + copy_zookeeper_data() + + genuine_connection = get_genuine_zk() + genuine_connection.add_auth('digest', 'user1:password1') + genuine_connection.add_auth('digest', 'user2:password2') + genuine_connection.add_auth('digest', 'user3:password3') + + fake_connection = get_fake_zk() + fake_connection.add_auth('digest', 'user1:password1') + fake_connection.add_auth('digest', 'user2:password2') + fake_connection.add_auth('digest', 'user3:password3') + + compare_states(genuine_connection, fake_connection) From 731edc9a6d13f63c2ba8fa95de761edcbb64cc4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Jun 2021 18:45:45 +0300 Subject: [PATCH 024/183] Fixes in ACLs --- src/Common/ZooKeeper/ZooKeeperCommon.h | 3 +++ src/Coordination/KeeperStorage.cpp | 18 +++++++----------- src/Coordination/KeeperStorage.h | 2 +- src/Coordination/ZooKeeperDataReader.cpp | 3 ++- .../test_keeper_zookeeper_converter/test.py | 5 +++++ 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index ced154133b5..c50c271c1ec 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -183,6 +183,9 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest bool isReadRequest() const override { return false; } size_t bytesSize() const override { return CreateRequest::bytesSize() + sizeof(xid) + sizeof(has_watch); } + + /// During recovery from log we don't rehash ACLs + bool need_to_hash_acls = true; }; struct ZooKeeperCreateResponse final : CreateResponse, ZooKeeperResponse diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d59af287bab..dd0a7dffabb 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -71,7 +71,6 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c for (const auto & node_acl : node_acls) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL PERMISSIONS {} SESSION PERMS {}", node_acl.permissions, permission); if (node_acl.permissions & permission) { if (node_acl.scheme == "world" && node_acl.id == "anyone") @@ -79,8 +78,6 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c for (const auto & session_auth : session_auths) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL SCHEME {} SESSION SCHEME {}", node_acl.scheme, session_auth.scheme); - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACL AUTHID {} SESSION AUTHID {}", node_acl.id, session_auth.id); if (node_acl.scheme == session_auth.scheme && node_acl.id == session_auth.id) return true; } @@ -93,7 +90,8 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c static bool fixupACL( const std::vector & request_acls, const std::vector & current_ids, - std::vector & result_acls) + std::vector & result_acls, + bool hash_acls) { if (request_acls.empty()) return true; @@ -126,7 +124,8 @@ static bool fixupACL( return false; valid_found = true; - new_acl.id = generateDigest(new_acl.id); + if (hash_acls) + new_acl.id = generateDigest(new_acl.id); result_acls.push_back(new_acl); } } @@ -274,7 +273,7 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest KeeperStorage::Node created_node; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls)) + if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls)) { response.error = Coordination::Error::ZINVALIDACL; return {response_ptr, {}}; @@ -358,19 +357,16 @@ struct KeeperStorageGetRequest final : public KeeperStorageRequest bool checkAuth(KeeperStorage & storage, int64_t session_id) const override { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "CHECKING ACL FOR PATH {} IN GET", zk_request->getPath()); auto & container = storage.container; auto it = container.find(zk_request->getPath()); if (it == container.end()) return true; const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "NODE ACLID {} ACL SIZE {}",it->value.acl_id, node_acls.size()); if (node_acls.empty()) return true; const auto & session_auths = storage.session_and_auth[session_id]; - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "SESSION AUTHS SIZE {}", session_auths.size()); return checkACL(Coordination::ACL::Read, node_acls, session_auths); } @@ -911,7 +907,7 @@ KeeperWrapperFactory::KeeperWrapperFactory() } -KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional new_last_zxid) +KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordination::ZooKeeperRequestPtr & zk_request, int64_t session_id, std::optional new_last_zxid, bool check_acl) { KeeperStorage::ResponsesForSessions results; if (new_last_zxid) @@ -969,7 +965,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina KeeperStorageRequestPtr storage_request = KeeperWrapperFactory::instance().get(zk_request); Coordination::ZooKeeperResponsePtr response; - if (!storage_request->checkAuth(*this, session_id)) + if (check_acl && !storage_request->checkAuth(*this, session_id)) { response = zk_request->makeResponse(); /// Original ZooKeeper always throws no auth, even when user provided some credentials diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 7c90a9bd661..e3cb0f59fdc 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -116,7 +116,7 @@ public: session_expiry_queue.update(session_id, session_timeout_ms); } - ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional new_last_zxid); + ResponsesForSessions processRequest(const Coordination::ZooKeeperRequestPtr & request, int64_t session_id, std::optional new_last_zxid, bool check_acl = true); void finalize(); diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 42440250ed8..10d04ba77f9 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -289,6 +289,7 @@ Coordination::ZooKeeperRequestPtr deserializeCreateTxn(ReadBuffer & in) Coordination::read(result->data, in); Coordination::read(result->acls, in); Coordination::read(result->is_ephemeral, in); + result->need_to_hash_acls = false; /// How we should use it? It should just increment on request execution int32_t parent_c_version; Coordination::read(parent_c_version, in); @@ -476,7 +477,7 @@ bool deserializeTxn(KeeperStorage & storage, ReadBuffer & in, Poco::Logger * /*l if (request->getOpNum() == Coordination::OpNum::Multi && hasErrorsInMultiRequest(request)) return true; - storage.processRequest(request, session_id, zxid); + storage.processRequest(request, session_id, zxid, /* check_acl = */ false); } } diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 61f4248f2be..fa2178974e9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -223,6 +223,11 @@ def test_acls(started_cluster): yet_other_auth_connection.set("/test_multi_all_acl", b"Y") + no_auth_connection = get_genuine_zk() + + with pytest.raises(Exception): + no_auth_connection.set("/test_multi_all_acl", b"Z") + copy_zookeeper_data() genuine_connection = get_genuine_zk() From 00f9dfc12a7f49c854883fa074e8b0770623c4c6 Mon Sep 17 00:00:00 2001 From: MyroTk Date: Mon, 21 Jun 2021 17:49:28 +0200 Subject: [PATCH 025/183] Syntax update - changing 'is' to '=='. --- .../extended_precision_data_types/tests/arithmetic.py | 2 +- .../extended_precision_data_types/tests/rounding.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/testflows/extended_precision_data_types/tests/arithmetic.py b/tests/testflows/extended_precision_data_types/tests/arithmetic.py index 49d7ee1fcb3..c57f3d7d8e1 100644 --- a/tests/testflows/extended_precision_data_types/tests/arithmetic.py +++ b/tests/testflows/extended_precision_data_types/tests/arithmetic.py @@ -141,7 +141,7 @@ def inline_check_dec(self, arithmetic_func, expected_result, node=None): if node is None: node = self.context.node - if arithmetic_func is 'negate' or arithmetic_func is 'abs': + if arithmetic_func in ['negate','abs']: with When(f"I check {arithmetic_func} with toDecimal256"): output = node.query(f"SELECT {arithmetic_func}(toDecimal256(1,0))").output diff --git a/tests/testflows/extended_precision_data_types/tests/rounding.py b/tests/testflows/extended_precision_data_types/tests/rounding.py index f01d6898b32..e32f4e941d3 100644 --- a/tests/testflows/extended_precision_data_types/tests/rounding.py +++ b/tests/testflows/extended_precision_data_types/tests/rounding.py @@ -25,7 +25,7 @@ def round_int_inline(self, func, expected_result, supported, int_type, min, max, if node is None: node = self.context.node - if func is 'roundDown': + if func == 'roundDown': with When(f"I check roundDown with {int_type}"): node.query(f"SELECT roundDown(to{int_type}(1), [0,2]), roundDown(to{int_type}(\'{max}\'), [0,2]), roundDown(to{int_type}(\'{min}\'), [0,2])", @@ -62,7 +62,7 @@ def round_int_table(self, func, expected_result, supported, int_type, min, max, with Given("I have a table"): table(name = table_name, data_type = int_type) - if func is 'roundDown': + if func == 'roundDown': for value in [1,max,min]: @@ -101,7 +101,7 @@ def round_dec_inline(self, func, expected_result, supported, node=None): if node is None: node = self.context.node - if func is 'roundDown': + if func == 'roundDown': with When(f"I check roundDown with Decimal256"): node.query(f"""SELECT roundDown(toDecimal256(1,0), [toDecimal256(0,0),toDecimal256(2,0)]), @@ -142,7 +142,7 @@ def round_dec_table(self, func, expected_result, supported, node=None): with Given("I have a table"): table(name = table_name, data_type = 'Decimal256(0)') - if func is 'roundDown': + if func == 'roundDown': for value in [1, max, min]: From 6e3b1841de3d978108850f0a1153df6912e9a56c Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Mon, 21 Jun 2021 20:51:11 +0300 Subject: [PATCH 026/183] Update s3Cluster.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил примеры. --- docs/en/sql-reference/table-functions/s3Cluster.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index b49da53f01a..4bde49b8cc0 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -26,13 +26,13 @@ A table with the specified structure for reading or writing data in the specifie **Examples** -Selecting the data from the cluster `cluster_simple` using source `http://minio1:9001/root/data/{clickhouse,database}/*`: +Selecting the data from all files in the cluster `cluster_simple`: ``` sql SELECT * from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); ``` -Count the total amount of rows in all files of the cluster `cluster_simple`: +Count the total amount of rows in all files in the cluster `cluster_simple`: ``` sql SELECT count(*) from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); From 71e9689ba6a65a3461c1a386534c5bad05344241 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 21 Jun 2021 22:59:19 +0300 Subject: [PATCH 027/183] Fix PVS warning --- src/Coordination/ZooKeeperDataReader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 10d04ba77f9..cf28627961f 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -422,6 +422,9 @@ bool isErrorRequest(Coordination::ZooKeeperRequestPtr request) bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) { + if (request == nullptr) + return true; + for (const auto & subrequest : dynamic_cast(request.get())->requests) if (subrequest == nullptr) return true; From bf0304bc87cf46645e3d79fb433631a6721e9a08 Mon Sep 17 00:00:00 2001 From: yuchuansun Date: Tue, 22 Jun 2021 14:43:28 +0800 Subject: [PATCH 028/183] doc: update cluster.md in chinese --- docs/zh/operations/system-tables/clusters.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md index 1e5935c276e..71ecc4245d3 100644 --- a/docs/zh/operations/system-tables/clusters.md +++ b/docs/zh/operations/system-tables/clusters.md @@ -1,9 +1,4 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- - -# 系统。集群 {#system-clusters} +# 系统-集群 {#system-clusters} 包含有关配置文件中可用的集群及其中的服务器的信息。 From 730554589f0b876dbec48178408b28b423c3d235 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Jun 2021 11:14:54 +0300 Subject: [PATCH 029/183] fix superdigest --- tests/integration/test_keeper_auth/configs/keeper_config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_auth/configs/keeper_config.xml b/tests/integration/test_keeper_auth/configs/keeper_config.xml index bb3c9a5d94a..bee3ccb0aba 100644 --- a/tests/integration/test_keeper_auth/configs/keeper_config.xml +++ b/tests/integration/test_keeper_auth/configs/keeper_config.xml @@ -4,7 +4,7 @@ 1 /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots - super:0DPiKuNIrrVmD8IUCuw1hQxNqZc= + super:xQJmxLMiHGwaqBvst5y6rkB6HQs= 5000 From bf0a4864ac74b498eec0d522f778ad8464c4116c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Jun 2021 13:49:35 +0300 Subject: [PATCH 030/183] Add support for set/get ACL commands --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 51 +++++++++ src/Common/ZooKeeper/ZooKeeperCommon.h | 42 +++++++ src/Common/ZooKeeper/ZooKeeperConstants.cpp | 6 + src/Common/ZooKeeper/ZooKeeperConstants.h | 2 + src/Coordination/KeeperStorage.cpp | 107 ++++++++++++++++++ src/Coordination/ZooKeeperDataReader.cpp | 17 +++ tests/integration/test_keeper_auth/test.py | 46 +++++++- .../test_keeper_zookeeper_converter/test.py | 15 +++ 8 files changed, 285 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 50bdc6c77ba..1560d7a25da 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -239,6 +239,53 @@ void ZooKeeperListResponse::writeImpl(WriteBuffer & out) const Coordination::write(stat, out); } + +void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const +{ + Coordination::write(path, out); + Coordination::write(acls, out); + Coordination::write(version, out); +} + +void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) +{ + Coordination::read(path, in); + Coordination::read(acls, in); + Coordination::read(version, in); +} + +void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const +{ + Coordination::write(stat, out); +} + +void ZooKeeperSetACLResponse::readImpl(ReadBuffer & in) +{ + Coordination::read(stat, in); +} + +void ZooKeeperGetACLRequest::readImpl(ReadBuffer & in) +{ + Coordination::read(path, in); +} + +void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const +{ + Coordination::write(path, out); +} + +void ZooKeeperGetACLResponse::writeImpl(WriteBuffer & out) const +{ + Coordination::write(acl, out); + Coordination::write(stat, out); +} + +void ZooKeeperGetACLResponse::readImpl(ReadBuffer & in) +{ + Coordination::read(acl, in); + Coordination::read(stat, in); +} + void ZooKeeperCheckRequest::writeImpl(WriteBuffer & out) const { Coordination::write(path, out); @@ -454,6 +501,8 @@ ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return std::ma ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return std::make_shared(); } ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { return std::make_shared(requests); } ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared(); } +ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return std::make_shared(); } +ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return std::make_shared(); } void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const { @@ -545,6 +594,8 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory() registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); } } diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index c50c271c1ec..a816c1eb8bb 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -353,6 +353,48 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse size_t bytesSize() const override { return ErrorResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } }; +struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest +{ + OpNum getOpNum() const override { return OpNum::SetACL; } + void writeImpl(WriteBuffer & out) const override; + void readImpl(ReadBuffer & in) override; + ZooKeeperResponsePtr makeResponse() const override; + bool isReadRequest() const override { return false; } + + size_t bytesSize() const override { return SetACLRequest::bytesSize() + sizeof(xid); } + + bool need_to_hash_acls = true; +}; + +struct ZooKeeperSetACLResponse final : SetACLResponse, ZooKeeperResponse +{ + void readImpl(ReadBuffer & in) override; + void writeImpl(WriteBuffer & out) const override; + OpNum getOpNum() const override { return OpNum::SetACL; } + + size_t bytesSize() const override { return SetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } +}; + +struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest +{ + OpNum getOpNum() const override { return OpNum::GetACL; } + void writeImpl(WriteBuffer & out) const override; + void readImpl(ReadBuffer & in) override; + ZooKeeperResponsePtr makeResponse() const override; + bool isReadRequest() const override { return true; } + + size_t bytesSize() const override { return GetACLRequest::bytesSize() + sizeof(xid); } +}; + +struct ZooKeeperGetACLResponse final : GetACLResponse, ZooKeeperResponse +{ + void readImpl(ReadBuffer & in) override; + void writeImpl(WriteBuffer & out) const override; + OpNum getOpNum() const override { return OpNum::GetACL; } + + size_t bytesSize() const override { return GetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } +}; + struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest { OpNum getOpNum() const override { return OpNum::Multi; } diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp index d2dde4c4cdd..3f480fb6b2b 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp +++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp @@ -22,6 +22,8 @@ static const std::unordered_set VALID_OPERATIONS = static_cast(OpNum::Multi), static_cast(OpNum::Auth), static_cast(OpNum::SessionID), + static_cast(OpNum::SetACL), + static_cast(OpNum::GetACL), }; std::string toString(OpNum op_num) @@ -58,6 +60,10 @@ std::string toString(OpNum op_num) return "Auth"; case OpNum::SessionID: return "SessionID"; + case OpNum::SetACL: + return "SetACL"; + case OpNum::GetACL: + return "GetACL"; } int32_t raw_op = static_cast(op_num); throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED); diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index f91204693a0..ed7afd83628 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -23,6 +23,8 @@ enum class OpNum : int32_t Exists = 3, Get = 4, Set = 5, + GetACL = 6, + SetACL = 7, SimpleList = 8, Sync = 9, Heartbeat = 11, diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index dd0a7dffabb..97c78e04f05 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -678,6 +678,111 @@ struct KeeperStorageCheckRequest final : public KeeperStorageRequest } }; + +struct KeeperStorageSetACLRequest final : public KeeperStorageRequest +{ + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + { + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + return checkACL(Coordination::ACL::Admin, node_acls, session_auths); + } + + using KeeperStorageRequest::KeeperStorageRequest; + + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t session_id) const override + { + auto & container = storage.container; + + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperSetACLResponse & response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperSetACLRequest & request = dynamic_cast(*zk_request); + auto it = container.find(request.path); + if (it == container.end()) + { + response.error = Coordination::Error::ZNONODE; + } + else if (request.version != -1 && request.version != it->value.stat.aversion) + { + response.error = Coordination::Error::ZBADVERSION; + } + else + { + auto & session_auth_ids = storage.session_and_auth[session_id]; + Coordination::ACLs node_acls; + + if (!fixupACL(request.acls, session_auth_ids, node_acls, request.need_to_hash_acls)) + { + response.error = Coordination::Error::ZINVALIDACL; + return {response_ptr, {}}; + } + + uint64_t acl_id = storage.acl_map.convertACLs(node_acls); + storage.acl_map.addUsage(acl_id); + + storage.container.updateValue(request.path, [acl_id] (KeeperStorage::Node & node) + { + node.acl_id = acl_id; + ++node.stat.aversion; + }); + + response.stat = it->value.stat; + response.error = Coordination::Error::ZOK; + } + + /// It cannot be used insied multitransaction? + return { response_ptr, {} }; + } +}; + +struct KeeperStorageGetACLRequest final : public KeeperStorageRequest +{ + bool checkAuth(KeeperStorage & storage, int64_t session_id) const override + { + auto & container = storage.container; + auto it = container.find(zk_request->getPath()); + if (it == container.end()) + return true; + + const auto & node_acls = storage.acl_map.convertNumber(it->value.acl_id); + if (node_acls.empty()) + return true; + + const auto & session_auths = storage.session_and_auth[session_id]; + /// LOL, GetACL require more permissions, then SetACL... + return checkACL(Coordination::ACL::Admin | Coordination::ACL::Read, node_acls, session_auths); + } + using KeeperStorageRequest::KeeperStorageRequest; + + std::pair process(KeeperStorage & storage, int64_t /*zxid*/, int64_t /*session_id*/) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + Coordination::ZooKeeperGetACLResponse & response = dynamic_cast(*response_ptr); + Coordination::ZooKeeperGetACLRequest & request = dynamic_cast(*zk_request); + auto & container = storage.container; + auto it = container.find(request.path); + if (it == container.end()) + { + response.error = Coordination::Error::ZNONODE; + } + else + { + response.stat = it->value.stat; + response.acl = storage.acl_map.convertNumber(it->value.acl_id); + } + + return {response_ptr, {}}; + } +}; + struct KeeperStorageMultiRequest final : public KeeperStorageRequest { bool checkAuth(KeeperStorage & storage, int64_t session_id) const override @@ -904,6 +1009,8 @@ KeeperWrapperFactory::KeeperWrapperFactory() registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); + registerKeeperRequestWrapper(*this); + registerKeeperRequestWrapper(*this); } diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index cf28627961f..51965b499a2 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -347,6 +347,20 @@ Coordination::ZooKeeperRequestPtr deserializeErrorTxn(ReadBuffer & in) return nullptr; } +Coordination::ZooKeeperRequestPtr deserializeSetACLTxn(ReadBuffer & in) +{ + std::shared_ptr result = std::make_shared(); + + Coordination::read(result->path, in); + Coordination::read(result->acls, in); + Coordination::read(result->version, in); + /// It stores version + 1 (which should be, not for request) + result->version -= 1; + result->need_to_hash_acls = false; + + return result; +} + Coordination::ZooKeeperRequestPtr deserializeMultiTxn(ReadBuffer & in); Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtxn) @@ -371,6 +385,9 @@ Coordination::ZooKeeperRequestPtr deserializeTxnImpl(ReadBuffer & in, bool subtx case 5: result = deserializeSetTxn(in); break; + case 7: + result = deserializeSetACLTxn(in); + break; case 13: result = deserializeCheckVersionTxn(in); break; diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 5f60d5b8bdb..721ccd6fddb 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 + import pytest from helpers.cluster import ClickHouseCluster from kazoo.client import KazooClient, KazooState @@ -300,3 +300,47 @@ def test_auth_snapshot(started_cluster): with pytest.raises(NoAuthError): connection2.get("/test_snapshot_acl1") + + +@pytest.mark.parametrize( + ('get_zk'), + [ + get_genuine_zk, + get_fake_zk + ] +) +def test_get_set_acl(started_cluster, get_zk): + auth_connection = get_zk() + auth_connection.add_auth('digest', 'username1:secret1') + auth_connection.add_auth('digest', 'username2:secret2') + + auth_connection.create("/test_set_get_acl", b"data", acl=[make_acl("auth", "", all=True)]) + + acls, stat = auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 0 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ['ALL'] + assert acl.id.scheme == 'digest' + assert acl.perms == 31 + assert acl.id.id in ('username1:eGncMdBgOfGS/TCojt51xWsWv/Y=', 'username2:qgSSumukVlhftkVycylbHNvxhFU=') + + + other_auth_connection = get_zk() + other_auth_connection.add_auth('digest', 'username1:secret1') + other_auth_connection.add_auth('digest', 'username3:secret3') + other_auth_connection.set_acls("/test_set_get_acl", acls=[make_acl("auth", "", read=True, write=False, create=True, delete=True, admin=True)]) + + acls, stat = other_auth_connection.get_acls("/test_set_get_acl") + + assert stat.aversion == 1 + assert len(acls) == 2 + for acl in acls: + assert acl.acl_list == ['READ', 'CREATE', 'DELETE', 'ADMIN'] + assert acl.id.scheme == 'digest' + assert acl.perms == 29 + assert acl.id.id in ('username1:eGncMdBgOfGS/TCojt51xWsWv/Y=', 'username3:CvWITOxxTwk+u6S5PoGlQ4hNoWI=') + + with pytest.raises(KazooException): + other_auth_connection.set_acls("/test_set_get_acl", acls=[make_acl("auth", "", all=True)], version=0) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index fa2178974e9..816faebe63d 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -223,6 +223,11 @@ def test_acls(started_cluster): yet_other_auth_connection.set("/test_multi_all_acl", b"Y") + genuine_connection.add_auth('digest', 'user3:password3') + + # just to check that we are able to deserialize it + genuine_connection.set_acls("/test_multi_all_acl", acls=[make_acl("auth", "", read=True, write=False, create=True, delete=True, admin=True)]) + no_auth_connection = get_genuine_zk() with pytest.raises(Exception): @@ -241,3 +246,13 @@ def test_acls(started_cluster): fake_connection.add_auth('digest', 'user3:password3') compare_states(genuine_connection, fake_connection) + + for connection in [genuine_connection, fake_connection]: + acls, stat = connection.get_acls("/test_multi_all_acl") + assert stat.aversion == 1 + assert len(acls) == 3 + for acl in acls: + assert acl.acl_list == ['READ', 'CREATE', 'DELETE', 'ADMIN'] + assert acl.id.scheme == 'digest' + assert acl.perms == 29 + assert acl.id.id in ('user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=', 'user2:lo/iTtNMP+gEZlpUNaCqLYO3i5U=', 'user3:wr5Y0kEs9nFX3bKrTMKxrlcFeWo=') From 788e61f80e32987ca2685474b93963fc0ae30138 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Jun 2021 14:43:26 +0300 Subject: [PATCH 031/183] Snapshots test --- .../test_keeper_zookeeper_converter/test.py | 50 ++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 816faebe63d..eac2b4c45c5 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -37,12 +37,16 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() -def copy_zookeeper_data(): +def copy_zookeeper_data(make_zk_snapshots): stop_zookeeper() + + if make_zk_snapshots: # force zookeeper to create snapshot + start_zookeeper() + stop_zookeeper() + stop_clickhouse() clear_clickhouse_data() convert_zookeeper_data() - print(node.exec_in_container) start_zookeeper() start_clickhouse() @@ -97,7 +101,13 @@ def compare_states(zk1, zk2, path="/"): print("Checking child", os.path.join(path, children)) compare_states(zk1, zk2, os.path.join(path, children)) -def test_smoke(started_cluster): +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_smoke(started_cluster, create_snapshots): restart_and_clear_zookeeper() genuine_connection = get_genuine_zk() @@ -105,7 +115,7 @@ def test_smoke(started_cluster): assert genuine_connection.get("/test")[0] == b"data" - copy_zookeeper_data() + copy_zookeeper_data(create_snapshots) genuine_connection = get_genuine_zk() fake_connection = get_fake_zk() @@ -115,7 +125,13 @@ def test_smoke(started_cluster): def get_bytes(s): return s.encode() -def test_simple_crud_requests(started_cluster): +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_simple_crud_requests(started_cluster, create_snapshots): restart_and_clear_zookeeper() genuine_connection = get_genuine_zk() @@ -144,7 +160,7 @@ def test_simple_crud_requests(started_cluster): for i in range(10): genuine_connection.create("/test_ephemeral/" + str(i), get_bytes("dataX" + str(i)), ephemeral=True) - copy_zookeeper_data() + copy_zookeeper_data(create_snapshots) genuine_connection = get_genuine_zk() fake_connection = get_fake_zk() @@ -159,8 +175,13 @@ def test_simple_crud_requests(started_cluster): second_children = list(sorted(fake_connection.get_children("/test_sequential"))) assert first_children == second_children, "Childrens are not equal on path " + path - -def test_multi_and_failed_requests(started_cluster): +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_multi_and_failed_requests(started_cluster, create_snapshots): restart_and_clear_zookeeper() genuine_connection = get_genuine_zk() @@ -196,15 +217,20 @@ def test_multi_and_failed_requests(started_cluster): assert genuine_connection.exists('/test_bad_transaction2') is None assert genuine_connection.exists('/test_multitransactions/freddy0') is not None - copy_zookeeper_data() + copy_zookeeper_data(create_snapshots) genuine_connection = get_genuine_zk() fake_connection = get_fake_zk() compare_states(genuine_connection, fake_connection) - -def test_acls(started_cluster): +@pytest.mark.parametrize( + ('create_snapshots'), + [ + True, False + ] +) +def test_acls(started_cluster, create_snapshots): restart_and_clear_zookeeper() genuine_connection = get_genuine_zk() genuine_connection.add_auth('digest', 'user1:password1') @@ -233,7 +259,7 @@ def test_acls(started_cluster): with pytest.raises(Exception): no_auth_connection.set("/test_multi_all_acl", b"Z") - copy_zookeeper_data() + copy_zookeeper_data(create_snapshots) genuine_connection = get_genuine_zk() genuine_connection.add_auth('digest', 'user1:password1') From 9e42833947c06783bbdf4371d19f11b7de8fcb75 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 22 Jun 2021 17:14:00 +0300 Subject: [PATCH 032/183] Added translation --- .../reference/quantiles.md | 1 - .../reference/quantileexact.md | 98 +++++++++++++++++ .../reference/quantiles.md | 104 +++++++++++++++++- 3 files changed, 199 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 6fcc7f2d0fe..d8320067dd1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -89,7 +89,6 @@ Type of array values: - [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. - [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - **Example** Query: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index 82ebae1c14e..f5c33bdd79b 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -161,6 +161,104 @@ SELECT quantileExactHigh(number) FROM numbers(10) └───────────────────────────┘ ``` +## quantileExactExclusive {#quantileexactexclusive} + +Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Внутренние состояния функций `quantileExactExclusive` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), это повысит эффективность запроса. + +**Синтакс** + +``` sql +quantileExactExclusive(level)(expr) +``` + +**Аргументы** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1). Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../float.md). +- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Возвращаемое значение** + +- Квантиль заданного уровня. + +Тип: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantileExactExclusive(0.6)(x)─┐ +│ 599.6 │ +└────────────────────────────────┘ +``` + +## quantileExactInclusive {#quantileexactinclusive} + +Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Внутренние состояния функций `quantileExactInclusive` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), это повысит эффективность запроса. + +**Синтакс** + +``` sql +quantileExactInclusive(level)(expr) +``` + +**Аргументы** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1]. Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../float.md). +- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Возвращаемые значения** + +- Квантиль заданного уровня. + +Тип: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantileExactInclusive(0.6)(x)─┐ +│ 599.4 │ +└────────────────────────────────┘ +``` + **Смотрите также** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index 671cbc1fc4d..1ed705c5bac 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -2,9 +2,107 @@ toc_priority: 201 --- -# quantiles {#quantiles} +# quantiles Functions {#quantiles-functions} +## quantiles {#quantiles} -Syntax: `quantiles(level1, level2, …)(x)` +Синтаксис: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями. +## quantilesExactExclusive {#quantilesexactexclusive} + +Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Работает более эффективно с наборами уровней, чем [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). + +**Синтакс** + +``` sql +quantilesExactExclusive(level1, level2, ...)(expr) +``` + +**Аргументы** + +- `level` — уровень квантилей. Возможные значения: (0, 1). +- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Возвращаемые значения** + +- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней. + +Тип значений массива: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.25,499.5,749.75,899.9,949.9499999999999,989.99,998.999] │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## quantilesExactInclusive {#quantilesexactinclusive} + +Точно вычисляет [квантили](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. + +Чтобы получить точный результат, все переданные значения собираются в массив, который затем частично сортируется. Таким образом, функция потребляет объем памяти `O(n)`, где `n` — количество переданных значений. Для небольшого числа значений эта функция эффективна. + +Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). + +Работает более эффективно с наборами уровней, чем [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive). + +**Синтаксис** + +``` sql +quantilesExactInclusive(level1, level2, ...)(expr) +``` + +**Аргументы** + +- `level` — уровень квантилей. Возможные значения: [0, 1]. +- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Возвращаемые значения** + +- [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней. + +Тип значений массива: + +- [Float64](../../../sql-reference/data-types/float.md) для входных данных числового типа. +- [Date](../../../sql-reference/data-types/date.md), если входные значения имеют тип `Date`. +- [DateTime](../../../sql-reference/data-types/datetime.md), если входные значения имеют тип `DateTime`. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE num AS numbers(1000); + +SELECT quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM (SELECT number AS x FROM num); +``` + +Результат: + +``` text +┌─quantilesExactInclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x)─┐ +│ [249.75,499.5,749.25,899.1,949.05,989.01,998.001] │ +└─────────────────────────────────────────────────────────────────────┘ +``` From 54d1bef0876c8886b3679bc7d4625a6019b3840b Mon Sep 17 00:00:00 2001 From: George Date: Tue, 22 Jun 2021 17:17:31 +0300 Subject: [PATCH 033/183] Small update --- .../aggregate-functions/reference/quantiles.md | 1 + .../aggregate-functions/reference/quantileexact.md | 8 +++++--- .../aggregate-functions/reference/quantiles.md | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index d8320067dd1..06bee5ed038 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -3,6 +3,7 @@ toc_priority: 201 --- # quantiles Functions {#quantiles-functions} + ## quantiles {#quantiles} Syntax: `quantiles(level1, level2, …)(x)` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index f5c33bdd79b..eada2a16a8f 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -2,7 +2,9 @@ toc_priority: 202 --- -# quantileExact {#quantileexact} +# Функции quantileExact {#quantileexact-functions} + +## quantileExact {#quantileexact} Точно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) числовой последовательности. @@ -50,7 +52,7 @@ SELECT quantileExact(number) FROM numbers(10) └───────────────────────┘ ``` -# quantileExactLow {#quantileexactlow} +## quantileExactLow {#quantileexactlow} Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных. @@ -109,7 +111,7 @@ SELECT quantileExactLow(number) FROM numbers(10) │ 4 │ └──────────────────────────┘ ``` -# quantileExactHigh {#quantileexacthigh} +## quantileExactHigh {#quantileexacthigh} Как и `quantileExact`, эта функция вычисляет точный [квантиль](https://en.wikipedia.org/wiki/Quantile) числовой последовательности данных. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index 1ed705c5bac..36fc436c56c 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -2,7 +2,8 @@ toc_priority: 201 --- -# quantiles Functions {#quantiles-functions} +# Функции для нескольких квантилей {#quantiles-functions} + ## quantiles {#quantiles} Синтаксис: `quantiles(level1, level2, …)(x)` From 4af3e38b52a7d9029e6fbf16d3114bcd80c36fb2 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 22 Jun 2021 17:25:48 +0300 Subject: [PATCH 034/183] Fixed links --- .../aggregate-functions/reference/quantileexact.md | 4 ++-- .../sql-reference/aggregate-functions/reference/quantiles.md | 4 ++-- .../aggregate-functions/reference/quantileexact.md | 4 ++-- .../sql-reference/aggregate-functions/reference/quantiles.md | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index e7890f231bb..bb1906f3a8c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -178,7 +178,7 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: (0, 1). Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../float.md). +- `level` — Level of quantile. Optional. Possible values: (0, 1). Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -227,7 +227,7 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: [0, 1]. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../float.md). +- `level` — Level of quantile. Optional. Possible values: [0, 1]. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 06bee5ed038..60ad80abae1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -28,7 +28,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: (0, 1). +- `level` — Leveles of quantiles. Possible values: (0, 1). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -77,7 +77,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: [0, 1]. +- `level` — Leveles of quantiles. Possible values: [0, 1]. [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index eada2a16a8f..7f5c0d50213 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -181,7 +181,7 @@ quantileExactExclusive(level)(expr) **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1). Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../float.md). +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1). Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемое значение** @@ -230,7 +230,7 @@ quantileExactInclusive(level)(expr) **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1]. Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../float.md). +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1]. Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index 36fc436c56c..ed9e124a3ce 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -28,7 +28,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Аргументы** -- `level` — уровень квантилей. Возможные значения: (0, 1). +- `level` — уровень квантилей. Возможные значения: (0, 1). [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** @@ -77,7 +77,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Аргументы** -- `level` — уровень квантилей. Возможные значения: [0, 1]. +- `level` — уровень квантилей. Возможные значения: [0, 1]. [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** From 8c367eecc68929e65246d009593df9c854473451 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 22 Jun 2021 17:47:40 +0300 Subject: [PATCH 035/183] fix --- .../aggregate-functions/reference/quantileexact.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index 7f5c0d50213..2a9ac46298b 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -233,7 +233,7 @@ quantileExactInclusive(level)(expr) - `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1]. Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). -**Возвращаемые значения** +**Возвращаемое значение** - Квантиль заданного уровня. From 0a937e7377d71689b77ce3d28c95394012c90fe7 Mon Sep 17 00:00:00 2001 From: Ildus Kurbangaliev Date: Tue, 22 Jun 2021 17:02:42 +0200 Subject: [PATCH 036/183] Support Map type in mapAdd and mapSubtract --- .../functions/tuple-map-functions.md | 17 +- src/Functions/array/mapOp.cpp | 397 ++++++++++++------ .../01318_map_add_map_subtract.sql | 4 +- ...map_add_map_subtract_on_map_type.reference | 55 +++ ...01318_map_add_map_subtract_on_map_type.sql | 46 ++ 5 files changed, 376 insertions(+), 143 deletions(-) create mode 100644 tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference create mode 100644 tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8b0710c0182..ff2f11322a4 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -70,23 +70,23 @@ Result: Collect all the keys and sum corresponding values. -**Syntax** +**Syntax** ``` sql -mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) +mapAdd(arg1, arg2 [, ...]) ``` -**Arguments** +**Arguments** -Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) o [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** -Query: +Query with a tuple map: ``` sql SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; @@ -100,6 +100,11 @@ Result: └───────────────┴────────────────────────────────────┘ ``` +Query with `Map` type: + +``` sql +``` + ## mapSubtract {#function-mapsubtract} Collect all the keys and subtract corresponding values. diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 1a19ee41d2f..da394c47f80 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -1,13 +1,18 @@ #include -#include +#include +#include #include +#include +#include +#include #include #include #include #include #include #include -#include +#include "Columns/ColumnMap.h" +#include "DataTypes/DataTypeMap.h" namespace DB @@ -24,8 +29,8 @@ namespace struct TupArg { - const IColumn & key_column; - const IColumn & val_column; + const ColumnPtr & key_column; + const ColumnPtr & val_column; const IColumn::Offsets & key_offsets; const IColumn::Offsets & val_offsets; bool is_const; @@ -52,17 +57,39 @@ private: bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + void checkTypes( + DataTypePtr & key_type, DataTypePtr & promoted_val_type, const DataTypePtr & check_key_type, DataTypePtr & check_val_type) const + { + if (!(check_key_type->equals(*key_type))) + throw Exception( + "Expected same " + key_type->getName() + " type for all keys in " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + WhichDataType which_val(promoted_val_type); + WhichDataType which_ch_val(check_val_type); + + if (which_ch_val.isFloat() != which_val.isFloat()) + throw Exception( + "All value types in " + getName() + " should be ether or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!(check_val_type->equals(*promoted_val_type))) + { + throw Exception( + "All value types in " + getName() + " should be promotable to " + promoted_val_type->getName() + ", got " + + check_val_type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + DataTypePtr getReturnTypeForTuples(const DataTypes & arguments) const { - bool is_float = false; DataTypePtr key_type, val_type, res; - if (arguments.size() < 2) - throw Exception{getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - - for (const auto & tup_arg : arguments) + for (const auto & arg : arguments) { - const DataTypeTuple * tup = checkAndGetDataType(tup_arg.get()); + const DataTypeArray * k; + const DataTypeArray * v; + + const DataTypeTuple * tup = checkAndGetDataType(arg.get()); if (!tup) throw Exception{getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; @@ -71,8 +98,8 @@ private: throw Exception( "Each tuple in " + getName() + " arguments should consist of two arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const DataTypeArray * k = checkAndGetDataType(elems[0].get()); - const DataTypeArray * v = checkAndGetDataType(elems[1].get()); + k = checkAndGetDataType(elems[0].get()); + v = checkAndGetDataType(elems[1].get()); if (!k || !v) throw Exception( @@ -80,62 +107,100 @@ private: auto result_type = v->getNestedType(); if (!result_type->canBePromoted()) - throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception{ + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - WhichDataType which_val(result_type); - - auto promoted_type = result_type->promoteNumericType(); + auto promoted_val_type = result_type->promoteNumericType(); if (!key_type) { key_type = k->getNestedType(); - val_type = promoted_type; - is_float = which_val.isFloat(); + val_type = promoted_val_type; + res = std::make_shared( + DataTypes{std::make_shared(k->getNestedType()), std::make_shared(promoted_val_type)}); } else - { - if (!(k->getNestedType()->equals(*key_type))) - throw Exception( - "All key types in " + getName() + " should be same: " + key_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (is_float != which_val.isFloat()) - throw Exception( - "All value types in " + getName() + " should be or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!(promoted_type->equals(*val_type))) - { - throw Exception( - "All value types in " + getName() + " should be promotable to " + val_type->getName() + ", got " - + promoted_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } - - if (!res) - { - res = std::make_shared( - DataTypes{std::make_shared(k->getNestedType()), std::make_shared(promoted_type)}); - } + checkTypes(key_type, val_type, k->getNestedType(), promoted_val_type); } return res; } - template - ColumnPtr execute2(size_t row_count, TupleMaps & args, const DataTypeTuple & res_type) const + DataTypePtr getReturnTypeForMaps(const DataTypes & arguments) const { - MutableColumnPtr res_tuple = res_type.createColumn(); + DataTypePtr key_type, val_type, res; - auto * to_tuple = assert_cast(res_tuple.get()); - auto & to_keys_arr = assert_cast(to_tuple->getColumn(0)); - auto & to_keys_data = to_keys_arr.getData(); - auto & to_keys_offset = to_keys_arr.getOffsets(); + for (const auto & arg : arguments) + { + const auto * map = checkAndGetDataType(arg.get()); + if (!map) + throw Exception{getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - auto & to_vals_arr = assert_cast(to_tuple->getColumn(1)); - auto & to_vals_data = to_vals_arr.getData(); + const auto & v = map->getValueType(); + + if (!v->canBePromoted()) + throw Exception{ + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto promoted_val_type = v->promoteNumericType(); + if (!key_type) + { + key_type = map->getKeyType(); + val_type = promoted_val_type; + res = std::make_shared(DataTypes({key_type, promoted_val_type})); + } + else + checkTypes(key_type, val_type, map->getKeyType(), promoted_val_type); + } + + return res; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2) + throw Exception{getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + if (arguments[0]->getTypeId() == TypeIndex::Tuple) + return getReturnTypeForTuples(arguments); + else if (arguments[0]->getTypeId() == TypeIndex::Map) + return getReturnTypeForMaps(arguments); + else + throw Exception{getName() + " only accepts maps", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + + template + ColumnPtr execute2(size_t row_count, TupleMaps & args, const DataTypePtr res_type) const + { + MutableColumnPtr res_column = res_type->createColumn(); + IColumn *to_keys_data, *to_vals_data; + ColumnArray::Offsets * to_keys_offset; + ColumnArray::Offsets * to_vals_offset = nullptr; + + // prepare output destinations + if (res_type->getTypeId() == TypeIndex::Tuple) + { + auto * to_tuple = assert_cast(res_column.get()); + auto & to_keys_arr = assert_cast(to_tuple->getColumn(0)); + to_keys_data = &to_keys_arr.getData(); + to_keys_offset = &to_keys_arr.getOffsets(); + + auto & to_vals_arr = assert_cast(to_tuple->getColumn(1)); + to_vals_data = &to_vals_arr.getData(); + to_vals_offset = &to_vals_arr.getOffsets(); + } + else + { + assert(res_type->getTypeId() == TypeIndex::Map); + + auto * to_map = assert_cast(res_column.get()); + auto & to_wrapper_arr = to_map->getNestedColumn(); + to_keys_offset = &to_wrapper_arr.getOffsets(); + + auto & to_map_tuple = to_map->getNestedData(); + to_keys_data = &to_map_tuple.getColumn(0); + to_vals_data = &to_map_tuple.getColumn(1); + } - size_t res_offset = 0; std::map summing_map; for (size_t i = 0; i < row_count; i++) @@ -147,7 +212,7 @@ private: if (!arg.is_const) { - offset = i > 0 ? arg.key_offsets[i - 1] : 0; + offset = arg.key_offsets[i - 1]; len = arg.key_offsets[i] - offset; if (arg.val_offsets[i] != arg.key_offsets[i]) @@ -155,20 +220,30 @@ private: "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } + Field temp_val; for (size_t j = 0; j < len; j++) { KeyType key; - if constexpr (is_str_key) + if constexpr (std::is_same::value) { - // have to use Field to get strings - key = arg.key_column[offset + j].get(); + if (const auto * col_fixed = checkAndGetColumn(arg.key_column.get())) + key = col_fixed->getDataAt(offset + j).toString(); + else if (const auto * col_str = checkAndGetColumn(arg.key_column.get())) + key = col_str->getDataAt(offset + j).toString(); + else + // should not happen + throw Exception( + "Expected String or FixedString, got " + std::string(getTypeName(arg.key_column->getDataType())) + + " in " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } else { - key = assert_cast &>(arg.key_column).getData()[offset + j]; + key = assert_cast *>(arg.key_column.get())->getData()[offset + j]; } - ValType value = arg.val_column[offset + j].get(); + arg.val_column->get(offset + j, temp_val); + ValType value = temp_val.get(); if constexpr (op_type == OpTypes::ADD) { @@ -190,132 +265,184 @@ private: for (const auto & elem : summing_map) { - res_offset++; - to_keys_data.insert(elem.first); - to_vals_data.insert(elem.second); + to_keys_data->insert(elem.first); + to_vals_data->insert(elem.second); } - to_keys_offset.push_back(res_offset); + to_keys_offset->push_back(to_keys_data->size()); summing_map.clear(); } - // same offsets as in keys - to_vals_arr.getOffsets().insert(to_keys_offset.begin(), to_keys_offset.end()); + if (to_vals_offset) + { + // same offsets as in keys + to_vals_offset->insert(to_keys_offset->begin(), to_keys_offset->end()); + } - return res_tuple; + return res_column; } - template - ColumnPtr execute1(size_t row_count, const DataTypeTuple & res_type, TupleMaps & args) const + template + ColumnPtr execute1(size_t row_count, const DataTypePtr res_type, const DataTypePtr res_value_type, TupleMaps & args) const { - const auto & promoted_type = (assert_cast(res_type.getElements()[1].get()))->getNestedType(); -#define MATCH_EXECUTE(is_str) \ - switch (promoted_type->getTypeId()) \ - { \ - case TypeIndex::Int64: return execute2(row_count, args, res_type); \ - case TypeIndex::UInt64: return execute2(row_count, args, res_type); \ - case TypeIndex::Float64: return execute2(row_count, args, res_type); \ - default: \ - throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; \ - } - - if constexpr (is_str_key) + switch (res_value_type->getTypeId()) { - MATCH_EXECUTE(true) + case TypeIndex::Int64: + return execute2(row_count, args, res_type); + case TypeIndex::Int128: + return execute2(row_count, args, res_type); + case TypeIndex::Int256: + return execute2(row_count, args, res_type); + case TypeIndex::UInt64: + return execute2(row_count, args, res_type); + case TypeIndex::UInt128: + return execute2(row_count, args, res_type); + case TypeIndex::UInt256: + return execute2(row_count, args, res_type); + case TypeIndex::Float64: + return execute2(row_count, args, res_type); + default: + throw Exception{ + "Illegal column type " + res_value_type->getName() + " for values in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } - else - { - MATCH_EXECUTE(false) - } -#undef MATCH_EXECUTE } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override { + DataTypePtr key_type; + size_t row_count; const DataTypeTuple * tup_type = checkAndGetDataType((arguments[0]).type.get()); - const DataTypeArray * key_array_type = checkAndGetDataType(tup_type->getElements()[0].get()); - const DataTypeArray * val_array_type = checkAndGetDataType(tup_type->getElements()[1].get()); - - /* determine output type */ - const DataTypeTuple & res_type - = DataTypeTuple(DataTypes{std::make_shared(key_array_type->getNestedType()), - std::make_shared(val_array_type->getNestedType()->promoteNumericType())}); - + DataTypePtr res_type; + DataTypePtr res_value_type; TupleMaps args{}; args.reserve(arguments.size()); //prepare columns, extract data columns for direct access and put them to the vector - for (const auto & col : arguments) + if (tup_type) { - const ColumnTuple * tup; - bool is_const = isColumnConst(*col.column); - if (is_const) + const DataTypeArray * key_array_type = checkAndGetDataType(tup_type->getElements()[0].get()); + const DataTypeArray * val_array_type = checkAndGetDataType(tup_type->getElements()[1].get()); + + /* determine output type */ + res_value_type = val_array_type->getNestedType()->promoteNumericType(); + res_type = std::make_shared(DataTypes{ + std::make_shared(key_array_type->getNestedType()), std::make_shared(res_value_type)}); + + for (const auto & col : arguments) { - const auto * c = assert_cast(col.column.get()); - tup = assert_cast(c->getDataColumnPtr().get()); + const ColumnTuple * tup; + bool is_const = isColumnConst(*col.column); + if (is_const) + { + const auto * c = assert_cast(col.column.get()); + tup = assert_cast(c->getDataColumnPtr().get()); + } + else + tup = assert_cast(col.column.get()); + + const auto & arr1 = assert_cast(tup->getColumn(0)); + const auto & arr2 = assert_cast(tup->getColumn(1)); + + const auto & key_offsets = arr1.getOffsets(); + const auto & key_column = arr1.getDataPtr(); + + const auto & val_offsets = arr2.getOffsets(); + const auto & val_column = arr2.getDataPtr(); + + args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); + } + + key_type = key_array_type->getNestedType(); + } + else + { + const DataTypeMap * map_type = checkAndGetDataType((arguments[0]).type.get()); + if (map_type) + { + key_type = map_type->getKeyType(); + res_value_type = map_type->getValueType()->promoteNumericType(); + res_type = std::make_shared(DataTypes{map_type->getKeyType(), res_value_type}); + + for (const auto & col : arguments) + { + const ColumnMap * map; + bool is_const = isColumnConst(*col.column); + if (is_const) + { + const auto * c = assert_cast(col.column.get()); + map = assert_cast(c->getDataColumnPtr().get()); + } + else + map = assert_cast(col.column.get()); + + const auto & map_arr = map->getNestedColumn(); + const auto & key_offsets = map_arr.getOffsets(); + const auto & val_offsets = key_offsets; + + const auto & map_tup = map->getNestedData(); + const auto & key_column = map_tup.getColumnPtr(0); + const auto & val_column = map_tup.getColumnPtr(1); + + args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); + } } else - tup = assert_cast(col.column.get()); + throw Exception{ + "Illegal column type " + key_type->getName() + " in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } - const auto & arr1 = assert_cast(tup->getColumn(0)); - const auto & arr2 = assert_cast(tup->getColumn(1)); - - const auto & key_offsets = arr1.getOffsets(); - const auto & key_column = arr1.getData(); - - const auto & val_offsets = arr2.getOffsets(); - const auto & val_column = arr2.getData(); - - // we can check const columns before any processing - if (is_const) + // we can check const columns before any processing + for (auto & arg : args) + { + if (arg.is_const) { - if (val_offsets[0] != key_offsets[0]) + if (arg.val_offsets[0] != arg.key_offsets[0]) throw Exception( "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - - args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); } - size_t row_count = arguments[0].column->size(); - auto key_type_id = key_array_type->getNestedType()->getTypeId(); - - switch (key_type_id) + row_count = arguments[0].column->size(); + switch (key_type->getTypeId()) { case TypeIndex::Enum8: case TypeIndex::Int8: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Enum16: case TypeIndex::Int16: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int32: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int64: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int128: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int256: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt8: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Date: case TypeIndex::UInt16: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::DateTime: case TypeIndex::UInt32: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt64: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt128: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt256: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UUID: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::FixedString: case TypeIndex::String: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); default: - throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception{ + "Illegal column type " + key_type->getName() + " for keys in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } } }; diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract.sql b/tests/queries/0_stateless/01318_map_add_map_subtract.sql index 40c08e0a147..6ead7a2db46 100644 --- a/tests/queries/0_stateless/01318_map_add_map_subtract.sql +++ b/tests/queries/0_stateless/01318_map_add_map_subtract.sql @@ -2,7 +2,7 @@ drop table if exists map_test; create table map_test engine=TinyLog() as (select ([1, number], [toInt32(2),2]) as map from numbers(1, 10)); -- mapAdd -select mapAdd([1], [1]); -- { serverError 42 } +select mapAdd([1], [1]); -- { serverError 43 } select mapAdd(([1], [1])); -- { serverError 42 } select mapAdd(([1], [1]), map) from map_test; -- { serverError 43 } select mapAdd(([toUInt64(1)], [1]), map) from map_test; -- { serverError 43 } @@ -27,7 +27,7 @@ select mapAdd(([toInt64(1), 2], [toInt64(1), 1]), ([toInt64(1), 2], [toInt64(1), select mapAdd(([1, 2], [toFloat32(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); -select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 44 } +select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [1, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd((['a', 'b'], [1, 1]), ([key], [1])) from values('key String', ('b'), ('c'), ('d')); select mapAdd((cast(['a', 'b'], 'Array(FixedString(1))'), [1, 1]), ([key], [1])) as res, toTypeName(res) from values('key FixedString(1)', ('b'), ('c'), ('d')); diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference new file mode 100644 index 00000000000..96bafc2c79c --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference @@ -0,0 +1,55 @@ +{1:5} +{1:3,2:2} +{1:3,3:2} +{1:3,4:2} +{1:3,5:2} +{1:3,6:2} +{1:3,7:2} +{1:3,8:2} +{1:3,9:2} +{1:3,10:2} +{1:5,2:2} +{1:3,2:4} +{1:3,2:2,3:2} +{1:3,2:2,4:2} +{1:3,2:2,5:2} +{1:3,2:2,6:2} +{1:3,2:2,7:2} +{1:3,2:2,8:2} +{1:3,2:2,9:2} +{1:3,2:2,10:2} +{1:2,2:2} Map(UInt8,UInt64) +{1:2,2:2} Map(UInt16,UInt64) +{1:2,2:2} Map(UInt32,UInt64) +{1:2,2:2} Map(UInt64,UInt64) +{1:2,2:2} Map(UInt128,UInt128) +{1:2,2:2} Map(UInt256,UInt256) +{1:2,2:2} Map(Int16,UInt64) +{1:2,2:2} Map(Int16,Int64) +{1:2,2:2} Map(Int32,Int64) +{1:2,2:2} Map(Int64,Int64) +{1:2,2:2} Map(Int128,Int128) +{1:2,2:2} Map(Int256,Int256) +{1:3.300000023841858,2:2} Map(UInt8,Float64) +{1:3.3000000000000003,2:2} Map(UInt8,Float64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':1,'b':2} Map(String,UInt64) +{'a':1,'b':1,'c':1} Map(String,UInt64) +{'a':1,'b':1,'d':1} Map(String,UInt64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'a':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID,Int64) +{'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID,Int64) +{1:0,2:0} Map(UInt8,UInt64) +{1:18446744073709551615,2:18446744073709551615} Map(UInt8,UInt64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-1.0999999761581423,2:0} Map(UInt8,Float64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-2,2:-2,3:1} Map(UInt8,Int64) diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql new file mode 100644 index 00000000000..9f0f1cb0489 --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql @@ -0,0 +1,46 @@ +drop table if exists mapop_test; +set allow_experimental_map_type = 1; +create table mapop_test engine=TinyLog() as (select map(1, toInt32(2), number, 2) as m from numbers(1, 10)); + +-- mapAdd +select mapAdd(map(1, 1)); -- { serverError 42 } +select mapAdd(map(1, 1), m) from mapop_test; -- { serverError 43 } + +select mapAdd(map(toUInt64(1), toInt32(1)), m) from mapop_test; +select mapAdd(cast(m, 'Map(UInt8, UInt8)'), map(1, 1), map(2,2)) from mapop_test; + +-- cleanup +drop table mapop_test; + +-- check types +select mapAdd(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt16(1), toUInt16(1), 2, 1), map(toUInt16(1), toUInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt32(1), toUInt32(1), 2, 1), map(toUInt32(1), toUInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt64(1), toUInt64(1), 2, 1), map(toUInt64(1), toUInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt128(1), toUInt128(1), 2, 1), map(toUInt128(1), toUInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt256(1), toUInt256(1), 2, 1), map(toUInt256(1), toUInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(toInt8(1), 1, 2, 1), map(toInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt16(1), toInt16(1), 2, 1), map(toInt16(1), toInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt32(1), toInt32(1), 2, 1), map(toInt32(1), toInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt64(1), toInt64(1), 2, 1), map(toInt64(1), toInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt128(1), toInt128(1), 2, 1), map(toInt128(1), toInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt256(1), toInt256(1), 2, 1), map(toInt256(1), toInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 1, 2, 1)) as res, toTypeName(res); -- { serverError 43 } +select mapAdd(map('a', 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'FixedString(1)'), 1, 'b', 1), map(key, 1)) as res, toTypeName(res) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'LowCardinality(String)'), 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum16(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum8(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key UUID, val Int32', ('00000000-89ab-cdef-0123-456789abcdef', 1), ('11111111-89ab-cdef-0123-456789abcdef', 2)); + +-- mapSubtract, same rules as mapAdd +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 2, 2, 2)) as res, toTypeName(res); -- overflow +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(3), toInt32(1)), map(toUInt8(1), toInt32(2), 2, 2)) as res, toTypeName(res); From 9d084510c78ab47e8abc5481729457d559847017 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:24:58 +0300 Subject: [PATCH 037/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 4bde49b8cc0..c79d0d3dba2 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -3,7 +3,7 @@ toc_priority: 55 toc_title: s3Cluster --- -# S3Cluster Table Function {#s3Cluster-table-function} +# s3Cluster Table Function {#s3Cluster-table-function} Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator, it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatch each file dynamically. On the worker node, it asks the initiator about the next task to process, processes it. This is repeated until the tasks are finished. From 30d730ab5061e987eed4b18453db09ad891f2b99 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:27:23 +0300 Subject: [PATCH 038/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index c79d0d3dba2..07e053cf962 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -5,7 +5,7 @@ toc_title: s3Cluster # s3Cluster Table Function {#s3Cluster-table-function} -Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator, it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatch each file dynamically. On the worker node, it asks the initiator about the next task to process, processes it. This is repeated until the tasks are finished. +Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** From 5edc97cd93feb06f78e12d322e18e56813a5015c Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:28:25 +0300 Subject: [PATCH 039/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 07e053cf962..f16fdf053c9 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. From 39fef21d673fdb059ac4cb0ba87380dc21623cfd Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:28:36 +0300 Subject: [PATCH 040/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index f16fdf053c9..51a55028ddc 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -29,7 +29,7 @@ A table with the specified structure for reading or writing data in the specifie Selecting the data from all files in the cluster `cluster_simple`: ``` sql -SELECT * from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); +SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); ``` Count the total amount of rows in all files in the cluster `cluster_simple`: From a870dec0ef5668f29bd82c0a43bde0d7a74a9785 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:28:44 +0300 Subject: [PATCH 041/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 51a55028ddc..f8ebd93c8a7 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -26,7 +26,7 @@ A table with the specified structure for reading or writing data in the specifie **Examples** -Selecting the data from all files in the cluster `cluster_simple`: +Select the data from all files in the cluster `cluster_simple`: ``` sql SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); From 1230efb03bfc18584ebe42adb64de792dc1b4533 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:28:53 +0300 Subject: [PATCH 042/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index f8ebd93c8a7..4ef797c9734 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -35,7 +35,7 @@ SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickho Count the total amount of rows in all files in the cluster `cluster_simple`: ``` sql -SELECT count(*) from s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); +SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); ``` !!! warning "Warning" From d172509c7792a742241f149c10e5a2bf40b8ba05 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Tue, 22 Jun 2021 19:29:07 +0300 Subject: [PATCH 043/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 4ef797c9734..b5901a94974 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -44,4 +44,4 @@ SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{ **See Also** - [S3 engine](../../engines/table-engines/integrations/s3.md) -- [S3 table function](../../sql-reference/table-functions/s3.md) +- [s3 table function](../../sql-reference/table-functions/s3.md) From a1e65ae260d529269d48eb46e8a974c7d171aa1e Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 22 Jun 2021 21:50:51 +0300 Subject: [PATCH 044/183] Translate to Russian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Выполнил перевод на русский язык. --- .../table-functions/s3Cluster.md | 1 + .../table-functions/s3Cluster.md | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 docs/ru/sql-reference/table-functions/s3Cluster.md diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index b5901a94974..9e2291a346d 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -17,6 +17,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md new file mode 100644 index 00000000000..0f3c8f68c9c --- /dev/null +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -0,0 +1,48 @@ +--- +toc_priority: 55 +toc_title: s3Cluster +--- + +# Табличная Функция s3Cluster {#s3Cluster-table-function} + +Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, раскрывает звездочки в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу для обработки и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены. + +**Синтаксис** + +``` sql +s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) +``` + +**Аргументы** + +- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. +- `source` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные символы в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. +- `format` — [формат](../../interfaces/formats.md#formats) файла. +- `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. + +**Возвращаемое значение** + +Таблица с указанной структурой для чтения или записи данных в указанный файл. + +**Примеры** + +Выведем данные из всех файлов кластера `cluster_simple`: + +``` sql +SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); +``` + +Подсчитаем общее количество строк во всех файлах кластера `cluster_simple`: + +``` sql +SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); +``` + +!!! warning "Внимание" + Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. + +**Смотрите также** + +- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md) +- [Табличная функция s3](../../sql-reference/table-functions/s3.md) From 99e08b7406c5161ba21b78f4904bfdc32d99877a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 22 Jun 2021 23:22:13 +0300 Subject: [PATCH 045/183] Make network receive time metric to correctly include the time spent waiting for data from the client to INSERT #9958 --- src/IO/ReadBufferFromPocoSocket.cpp | 8 +++++++- ...network_receive_time_metric_insert.reference | 1 + .../01923_network_receive_time_metric_insert.sh | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference create mode 100755 tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index d1ceaaf6a35..e043764d280 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -76,7 +76,13 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const { - return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + if (available()) + return true; + + Stopwatch watch; + bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + return res; } } diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh new file mode 100755 index 00000000000..cd3202e94c9 --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" + +# Rate limit is chosen for operation to spent about one second. +seq 1 1000 | pv --quiet --rate-limit 3893 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" + +# We check that the value of NetworkReceiveElapsedMicroseconds is correctly include the time spent waiting data from the client. +${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; + SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'NetworkReceiveElapsedMicroseconds')] >= 1000000 FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From a92bd49229ea4abc49880054ebe02b02b68d5183 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 22 Jun 2021 18:50:25 +0300 Subject: [PATCH 046/183] Supress PVS --- src/Coordination/ZooKeeperDataReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 51965b499a2..8bcce25cfee 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -442,7 +442,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request) if (request == nullptr) return true; - for (const auto & subrequest : dynamic_cast(request.get())->requests) + for (const auto & subrequest : dynamic_cast(request.get())->requests) //-V522 if (subrequest == nullptr) return true; return false; From bb41ba6929a1bbe3d5b8ecbce8c7e11585409db2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 22 Jun 2021 23:24:13 +0300 Subject: [PATCH 047/183] Update 01923_network_receive_time_metric_insert.sh --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index cd3202e94c9..8d66cfddb3e 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE # Rate limit is chosen for operation to spent about one second. seq 1 1000 | pv --quiet --rate-limit 3893 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" -# We check that the value of NetworkReceiveElapsedMicroseconds is correctly include the time spent waiting data from the client. +# We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'NetworkReceiveElapsedMicroseconds')] >= 1000000 FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" From 779b5df1a25dfe57e6ef8f02ae5d999ac89af403 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 23 Jun 2021 00:10:53 +0300 Subject: [PATCH 048/183] Apply suggestions from code review Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../reference/quantileexact.md | 6 +++--- .../reference/quantiles.md | 4 ++-- .../reference/quantileexact.md | 20 +++++++++++-------- .../reference/quantiles.md | 12 +++++------ 4 files changed, 23 insertions(+), 19 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index bb1906f3a8c..5091b023f67 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -178,7 +178,7 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: (0, 1). Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -217,7 +217,7 @@ To get exact value, all the passed values ​​are combined into an array, whic This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)). -When using multiple `quantileExactInclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive) function. +When using multiple `quantileExactInclusive` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive) function. **Syntax** @@ -227,7 +227,7 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: [0, 1]. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 60ad80abae1..200a3f44dd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -28,7 +28,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: (0, 1). [Float](../../../sql-reference/data-types/float.md). +- `level` — Leveles of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** @@ -77,7 +77,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: [0, 1]. [Float](../../../sql-reference/data-types/float.md). +- `level` — Leveles of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index 2a9ac46298b..9a6fca678c5 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -171,9 +171,10 @@ SELECT quantileExactHigh(number) FROM numbers(10) Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). -Внутренние состояния функций `quantileExactExclusive` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), это повысит эффективность запроса. +Если в одном запросе вызывается несколько функций `quantileExactExclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), запрос будет выполняться эффективнее. + +**Синтаксис** -**Синтакс** ``` sql quantileExactExclusive(level)(expr) @@ -181,8 +182,10 @@ quantileExactExclusive(level)(expr) **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1). Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). -- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1) — граничные значения не учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). + +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + **Возвращаемое значение** @@ -220,9 +223,10 @@ SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). -Внутренние состояния функций `quantileExactInclusive` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactexclusive), это повысит эффективность запроса. +Если в одном запросе вызывается несколько функций `quantileExactInclusive` с разными значениями `level`, эти функции вычисляются независимо друг от друга. В таких случаях используйте функцию [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantilesexactinclusive), запрос будет выполняться эффективнее. + +**Синтаксис** -**Синтакс** ``` sql quantileExactInclusive(level)(expr) @@ -230,7 +234,8 @@ quantileExactInclusive(level)(expr) **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1]. Значения по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1] — граничные значения учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). + - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемое значение** @@ -265,4 +270,3 @@ SELECT quantileExactInclusive(0.6)(x) FROM (SELECT number AS x FROM num); - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) - diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index ed9e124a3ce..37857790971 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -18,9 +18,9 @@ toc_priority: 201 Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). -Работает более эффективно с наборами уровней, чем [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). +С наборами уровней работает эффективнее, чем [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive). -**Синтакс** +**Синтаксис** ``` sql quantilesExactExclusive(level1, level2, ...)(expr) @@ -28,8 +28,8 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Аргументы** -- `level` — уровень квантилей. Возможные значения: (0, 1). [Float](../../../sql-reference/data-types/float.md). -- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — уровень квантилей. Возможные значения: (0, 1) — граничные значения не учитываются. [Float](../../../sql-reference/data-types/float.md). +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** @@ -67,7 +67,7 @@ SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample). -Работает более эффективно с наборами уровней, чем [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive). +С наборами уровней работает эффективнее, чем [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive). **Синтаксис** @@ -77,7 +77,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Аргументы** -- `level` — уровень квантилей. Возможные значения: [0, 1]. [Float](../../../sql-reference/data-types/float.md). +- `level` — уровень квантилей. Возможные значения: [0, 1] — граничные значения учитываются. [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** From 57f6811ba0de1dfdbf1ac24e47c0cb6a5c1ee1c8 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 00:13:24 +0300 Subject: [PATCH 049/183] Draft --- docs/en/operations/settings/settings.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2bde3b03048..de7f734e8e0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1727,6 +1727,19 @@ Possible values: Default value: 0. +## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} + +Optimizes functions (if possible) to subcolumns to reduce amount of read data. + +- + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `value`. + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Type: seconds From d3d3c4b686ba0c7305770502420d1adf29983cbf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 23 Jun 2021 00:31:40 +0300 Subject: [PATCH 050/183] Adjust fast test --- docker/test/fasttest/run.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index cc6aeff357f..098384d6e61 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -381,6 +381,9 @@ function run_tests # needs psql 01889_postgresql_protocol_null_fields + + # needs pv + 01923_network_receive_time_metric_insert ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ From a0206dd438d478bdbc1f1de9e2a28cfc93fb761a Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 00:32:41 +0300 Subject: [PATCH 051/183] various fixes --- .../reference/quantileexact.md | 12 +++++++++--- .../aggregate-functions/reference/quantiles.md | 10 ++++++++-- .../reference/quantileexact.md | 18 ++++++++---------- .../aggregate-functions/reference/quantiles.md | 10 ++++++++-- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 5091b023f67..47164cec86d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -68,7 +68,7 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10) │ 1 │ └───────────────────────────────┘ ``` - + When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. **Syntax** @@ -178,9 +178,12 @@ quantileExactExclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +**Parameters** + +- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). + **Returned value** - Quantile of the specified level. @@ -227,9 +230,12 @@ quantileExactInclusive(level)(expr) **Arguments** -- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +**Parameters** + +- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). + **Returned value** - Quantile of the specified level. diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 200a3f44dd3..c3601f91350 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -28,9 +28,12 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +**Parameters** + +- `level` — Leveles of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). + **Returned value** - [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. @@ -77,9 +80,12 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `level` — Leveles of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +**Parameters** + +- `level` — Leveles of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). + **Returned value** - [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md index 9a6fca678c5..2f1e879eaa1 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantileexact.md @@ -25,7 +25,6 @@ quantileExact(level)(expr) - `level` — уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types) или типов [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -69,7 +68,7 @@ SELECT quantileExactLow(0.1)(number) FROM numbers(10) │ 1 │ └───────────────────────────────┘ ``` - + При использовании в запросе нескольких функций `quantile*` с разными уровнями, внутренние состояния не объединяются (то есть запрос работает менее эффективно). В этом случае используйте функцию [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles). **Синтаксис** @@ -85,7 +84,6 @@ quantileExact(level)(expr) - `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -136,7 +134,6 @@ quantileExactHigh(level)(expr) - `level` — уровень квантили. Опциональный параметр. Константное занчение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://en.wikipedia.org/wiki/Median). - `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). - **Возвращаемое значение** - Квантиль заданного уровня. @@ -175,17 +172,17 @@ SELECT quantileExactHigh(number) FROM numbers(10) **Синтаксис** - ``` sql quantileExactExclusive(level)(expr) ``` **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1) — граничные значения не учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). - - `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). +**Параметры** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: (0, 1) — граничные значения не учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). **Возвращаемое значение** @@ -227,16 +224,17 @@ SELECT quantileExactExclusive(0.6)(x) FROM (SELECT number AS x FROM num); **Синтаксис** - ``` sql quantileExactInclusive(level)(expr) ``` **Аргументы** -- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1] — граничные значения учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). -- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). +**Параметры** + +- `level` — уровень квантиля. Необязательный параметр. Возможные значения: [0, 1] — граничные значения учитываются. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). [Float](../../../sql-reference/data-types/float.md). **Возвращаемое значение** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md index 37857790971..d2e7003e4e7 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md @@ -28,9 +28,12 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Аргументы** -- `level` — уровень квантилей. Возможные значения: (0, 1) — граничные значения не учитываются. [Float](../../../sql-reference/data-types/float.md). - `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). +**Параметры** + +- `level` — уровень квантилей. Возможные значения: (0, 1) — граничные значения не учитываются. [Float](../../../sql-reference/data-types/float.md). + **Возвращаемые значения** - [Массив](../../../sql-reference/data-types/array.md) квантилей указанных уровней. @@ -77,8 +80,11 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Аргументы** +- `expr` — выражение, зависящее от значений столбцов. Возвращает данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). + +**Параметры** + - `level` — уровень квантилей. Возможные значения: [0, 1] — граничные значения учитываются. [Float](../../../sql-reference/data-types/float.md). -- `expr` — выражение, зависящее от значений столбцов, возвращающее данные [числовых типов](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) или [DateTime](../../../sql-reference/data-types/datetime.md). **Возвращаемые значения** From 7418d1c75727057297d0f6f28a7c3c3951d9440a Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 23 Jun 2021 01:08:20 +0300 Subject: [PATCH 052/183] Apply suggestions from code review Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../sql-reference/aggregate-functions/reference/quantiles.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index c3601f91350..73939f16db3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -32,7 +32,7 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Parameters** -- `level` — Leveles of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). **Returned value** @@ -84,7 +84,7 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Parameters** -- `level` — Leveles of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). **Returned value** From ecb766a5c647da8c68fbfe93579fb75179fdb7fd Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 03:15:11 +0300 Subject: [PATCH 053/183] En docs --- docs/en/operations/settings/settings.md | 15 ++++-- .../aggregate-functions/reference/count.md | 2 + docs/en/sql-reference/data-types/map.md | 48 +++++++++++++++++++ .../functions/array-functions.md | 6 +++ .../functions/tuple-map-functions.md | 4 ++ docs/en/sql-reference/operators/index.md | 4 ++ 6 files changed, 76 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index de7f734e8e0..1fd951383c8 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1729,16 +1729,25 @@ Default value: 0. ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Optimizes functions (if possible) to subcolumns to reduce amount of read data. +Optimizes functions (if possible) by transforming them to read the subcolumns. This reduces the amount of read data. -- +These function can be tranformed: + +- [length](../../sql-reference/functions/array-functions.md#array_functions-length) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +- [empty](../../sql-reference/functions/array-functions.md#function-empty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +- [isNotNull](../../sql-reference/operators.md#is-not-null#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +- [count](../../sql-reference/aggregate-functions/reference/count.md) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). Possible values: - 0 — Disabled. - 1 — Enabled. -Default value: `value`. +Default value: `0`. ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 48c6f3f8c05..a3d1fcdbf5c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -31,6 +31,8 @@ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this const The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). + **Examples** Example 1: diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 58634e5b669..10074f07cab 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -75,6 +75,54 @@ SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map └───────────────────────────────┘ ``` +## Subcolumn Map.keys {#subcolumn-keys} + +To read all keys of a `Map` you can use the subcolumn `keys`, which doesn't read the whole column. + +**Example** + +Query: + +``` sql +CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; + +INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); + +SELECT a.keys FROM t_map; +``` + +Result: + +``` text +┌─a.keys─────────────────┐ +│ ['key1','key2','key3'] │ +└────────────────────────┘ +``` + +## Subcolumn Map.values {#subcolumn-keys} + +To read all values of a `Map` you can use the subcolumn `values`, which doesn't read the whole column. + +**Example** + +Query: + +``` sql +CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; + +INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)) + +SELECT a.values FROM t_map; +``` + +Result: + +``` text +┌─a.values─┐ +│ [1,2,3] │ +└──────────┘ +``` + **See Also** - [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 6495a26a426..822600dd52f 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -11,18 +11,24 @@ Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). + ## notEmpty {#function-notempty} Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). + ## length {#array_functions-length} Returns the number of items in the array. The result type is UInt64. The function also works for strings. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 8b0710c0182..efede833e7a 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -220,6 +220,8 @@ Result: Returns all keys from the `map` parameter. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). + **Syntax** ```sql @@ -261,6 +263,8 @@ Result: Returns all values from the `map` parameter. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). + **Syntax** ```sql diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 268e56a5034..f45c7c7b90f 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -283,6 +283,8 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. - `0` otherwise. - For other values, the `IS NULL` operator always returns `0`. +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). + ``` sql @@ -313,3 +315,5 @@ SELECT * FROM t_null WHERE y IS NOT NULL │ 2 │ 3 │ └───┴───┘ ``` + +Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). From 7cc4fa3696b55ac8bfcf4557105b8e834112aa42 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 03:25:28 +0300 Subject: [PATCH 054/183] fixed links --- docs/en/sql-reference/aggregate-functions/reference/count.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index a3d1fcdbf5c..2d6d931866d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -31,7 +31,7 @@ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this const The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by setting [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../../sql-reference/data-types/nullable.md#finding-null). **Examples** From 4e22692512ea8f4617616d3bed7c662e71211103 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 03:26:08 +0300 Subject: [PATCH 055/183] fixed links --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1fd951383c8..47ac756d6b8 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1737,7 +1737,7 @@ These function can be tranformed: - [empty](../../sql-reference/functions/array-functions.md#function-empty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). - [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). - [isNull](../../sql-reference/operators/index.md#operator-is-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). -- [isNotNull](../../sql-reference/operators.md#is-not-null#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +- [isNotNull](../../sql-reference/operators/index.md#is-not-null#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). - [count](../../sql-reference/aggregate-functions/reference/count.md) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). - [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). - [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). From e7fe155e987f8e3b964e05b15fb81692c66c816f Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 03:58:24 +0300 Subject: [PATCH 056/183] Added the articles --- .../en/sql-reference/aggregate-functions/reference/count.md | 2 +- docs/en/sql-reference/functions/array-functions.md | 6 +++--- docs/en/sql-reference/functions/tuple-map-functions.md | 4 ++-- docs/en/sql-reference/operators/index.md | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 2d6d931866d..6f55d3b5cee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -31,7 +31,7 @@ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this const The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. -Can be optimized by setting [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../../sql-reference/data-types/nullable.md#finding-null). **Examples** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 822600dd52f..10b8500b571 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -11,7 +11,7 @@ Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). ## notEmpty {#function-notempty} @@ -19,7 +19,7 @@ Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). ## length {#array_functions-length} @@ -27,7 +27,7 @@ Returns the number of items in the array. The result type is UInt64. The function also works for strings. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index efede833e7a..2deb9323cff 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -220,7 +220,7 @@ Result: Returns all keys from the `map` parameter. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). +Can be optimized by setting the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). **Syntax** @@ -263,7 +263,7 @@ Result: Returns all values from the `map` parameter. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). **Syntax** diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index f45c7c7b90f..1cb7936969c 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -283,7 +283,7 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. - `0` otherwise. - For other values, the `IS NULL` operator always returns `0`. -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). @@ -316,4 +316,4 @@ SELECT * FROM t_null WHERE y IS NOT NULL └───┴───┘ ``` -Can be optimized by setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). From fa3d08420fbeb37bf7b4142cde7d3e3e02a2cd3f Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 04:04:20 +0300 Subject: [PATCH 057/183] Unrelated fix --- docs/en/sql-reference/data-types/map.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 10074f07cab..dc1a9846d22 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -8,6 +8,7 @@ toc_title: Map(key, value) `Map(key, value)` data type stores `key:value` pairs. **Parameters** + - `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). - `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). From 8f9166df4ea4032f22bdd72cf6e390776fe54ac5 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Jun 2021 12:11:13 +0300 Subject: [PATCH 058/183] Add read flag to KeyGetterForDict --- src/Interpreters/HashJoin.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index fcdf6305a68..469b3cddbee 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -204,6 +204,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s if (table_join->dictionary_reader) { + LOG_DEBUG(log, "Performing join over dict"); data->type = Type::DICT; std::get(data->maps).create(Type::DICT); chooseMethod(key_columns, key_sizes); /// init key_sizes @@ -326,8 +327,9 @@ public: FindResult findKey(const TableJoin & table_join, size_t row, const Arena &) { const DictionaryReader & reader = *table_join.dictionary_reader; - if (!read_result) + if (!dictionary_read) { + dictionary_read = true; reader.readKeys(*key_columns[0], read_result, found, positions); result.block = &read_result; @@ -345,6 +347,7 @@ private: const ColumnRawPtrs & key_columns; Block read_result; Mapped result; + bool dictionary_read = false; ColumnVector::Container found; std::vector positions; }; From ea7f798de4d0ebdcbcd5b7db6d8e2ac25fa0a544 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Jun 2021 13:05:51 +0300 Subject: [PATCH 059/183] Add tests/performance/dict_join.xml --- tests/performance/dict_join.xml | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/performance/dict_join.xml diff --git a/tests/performance/dict_join.xml b/tests/performance/dict_join.xml new file mode 100644 index 00000000000..e12ef4abd63 --- /dev/null +++ b/tests/performance/dict_join.xml @@ -0,0 +1,37 @@ + + + CREATE TABLE join_dictionary_source_table (key UInt64, value String) + ENGINE = MergeTree ORDER BY key; + + + + CREATE DICTIONARY join_hashed_dictionary (key UInt64, value String) + PRIMARY KEY key + SOURCE(CLICKHOUSE(DB 'default' TABLE 'join_dictionary_source_table')) + LIFETIME(MIN 0 MAX 1000) + LAYOUT(HASHED()); + + + + INSERT INTO join_dictionary_source_table + SELECT number, toString(number) + FROM numbers(10000000); + + + + SELECT COUNT() + FROM join_dictionary_source_table + JOIN join_hashed_dictionary + ON join_dictionary_source_table.key = join_hashed_dictionary.key; + + + + SELECT COUNT() + FROM join_dictionary_source_table + JOIN join_hashed_dictionary + ON join_dictionary_source_table.key = toUInt64(join_hashed_dictionary.key); + + + DROP DICTIONARY IF EXISTS join_hashed_dictionary; + DROP TABLE IF EXISTS join_dictionary_source_table; + From 4ce829d7c1df4dc94cdd9cf1e918257e63990214 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Jun 2021 13:13:11 +0300 Subject: [PATCH 060/183] Create KeyGetter outside of joinRightColumns --- src/Interpreters/HashJoin.cpp | 70 ++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 469b3cddbee..6e5f7df99bd 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -320,34 +320,25 @@ public: using Mapped = RowRef; using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; - KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *) - : key_columns(key_columns_) - {} - - FindResult findKey(const TableJoin & table_join, size_t row, const Arena &) + KeyGetterForDict(const TableJoin & table_join, const ColumnRawPtrs & key_columns) { - const DictionaryReader & reader = *table_join.dictionary_reader; - if (!dictionary_read) - { - dictionary_read = true; - reader.readKeys(*key_columns[0], read_result, found, positions); - result.block = &read_result; + table_join.dictionary_reader->readKeys(*key_columns[0], read_result, found, positions); - if (table_join.forceNullableRight()) - for (auto & column : read_result) - if (table_join.rightBecomeNullable(column.type)) - JoinCommon::convertColumnToNullable(column); - } + for (ColumnWithTypeAndName & column : read_result) + if (table_join.rightBecomeNullable(column.type)) + JoinCommon::convertColumnToNullable(column); + } + FindResult findKey(void *, size_t row, const Arena &) + { + result.block = &read_result; result.row_num = positions[row]; return FindResult(&result, found[row], 0); } private: - const ColumnRawPtrs & key_columns; Block read_result; Mapped result; - bool dictionary_read = false; ColumnVector::Container found; std::vector positions; }; @@ -854,6 +845,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). template NO_INLINE IColumn::Filter joinRightColumns( + KeyGetter && key_getter, const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map [[maybe_unused]], @@ -883,8 +875,6 @@ NO_INLINE IColumn::Filter joinRightColumns( if constexpr (need_replication) added_columns.offsets_to_replicate = std::make_unique(rows); - auto key_getter = createKeyGetter(added_columns.key_columns, added_columns.key_sizes); - IColumn::Offset current_offset = 0; for (size_t i = 0; i < rows; ++i) @@ -983,35 +973,51 @@ NO_INLINE IColumn::Filter joinRightColumns( template IColumn::Filter joinRightColumnsSwitchNullability( - const Map & map, AddedColumns & added_columns, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags) + KeyGetter && key_getter, + const Map & map, + AddedColumns & added_columns, + const ConstNullMapPtr & null_map, + JoinStuff::JoinUsedFlags & used_flags) { if (added_columns.need_filter) { if (null_map) - return joinRightColumns(map, added_columns, null_map, used_flags); + return joinRightColumns( + std::forward(key_getter), map, added_columns, null_map, used_flags); else - return joinRightColumns(map, added_columns, nullptr, used_flags); + return joinRightColumns( + std::forward(key_getter), map, added_columns, nullptr, used_flags); } else { if (null_map) - return joinRightColumns(map, added_columns, null_map, used_flags); + return joinRightColumns( + std::forward(key_getter), map, added_columns, null_map, used_flags); else - return joinRightColumns(map, added_columns, nullptr, used_flags); + return joinRightColumns( + std::forward(key_getter), map, added_columns, nullptr, used_flags); } } template IColumn::Filter switchJoinRightColumns( - const Maps & maps_, AddedColumns & added_columns, HashJoin::Type type, const ConstNullMapPtr & null_map, JoinStuff::JoinUsedFlags & used_flags) + const Maps & maps_, + AddedColumns & added_columns, + HashJoin::Type type, + const ConstNullMapPtr & null_map, + JoinStuff::JoinUsedFlags & used_flags) { + constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; switch (type) { #define M(TYPE) \ case HashJoin::Type::TYPE: \ - return joinRightColumnsSwitchNullability>::Type>(\ - *maps_.TYPE, added_columns, null_map, used_flags); + { \ + using KeyGetter = typename KeyGetterForType>::Type; \ + auto key_getter = createKeyGetter(added_columns.key_columns, added_columns.key_sizes); \ + return joinRightColumnsSwitchNullability( \ + std::move(key_getter), *maps_.TYPE, added_columns, null_map, used_flags); \ + } APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1028,8 +1034,12 @@ IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedCo STRICTNESS == ASTTableJoin::Strictness::Semi || STRICTNESS == ASTTableJoin::Strictness::Anti)) { + assert(added_columns.key_columns.size() == 1); + JoinStuff::JoinUsedFlags flags; - return joinRightColumnsSwitchNullability(table_join, added_columns, null_map, flags); + KeyGetterForDict key_getter(table_join, added_columns.key_columns); + return joinRightColumnsSwitchNullability( + std::move(key_getter), nullptr, added_columns, null_map, flags); } throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR); From 541c09d7b3a9df3be8cb7cc08aa6c7db20ff4578 Mon Sep 17 00:00:00 2001 From: Ildus Kurbangaliev Date: Wed, 23 Jun 2021 15:03:58 +0200 Subject: [PATCH 061/183] Fix tests errors --- src/Functions/array/mapOp.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index da394c47f80..5c2637270d5 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -19,7 +19,6 @@ namespace DB { namespace ErrorCodes { - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -91,7 +90,7 @@ private: const DataTypeTuple * tup = checkAndGetDataType(arg.get()); if (!tup) - throw Exception{getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); auto elems = tup->getElements(); if (elems.size() != 2) @@ -107,8 +106,8 @@ private: auto result_type = v->getNestedType(); if (!result_type->canBePromoted()) - throw Exception{ - "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception( + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto promoted_val_type = result_type->promoteNumericType(); if (!key_type) @@ -133,13 +132,13 @@ private: { const auto * map = checkAndGetDataType(arg.get()); if (!map) - throw Exception{getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto & v = map->getValueType(); if (!v->canBePromoted()) - throw Exception{ - "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception( + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto promoted_val_type = v->promoteNumericType(); if (!key_type) @@ -158,14 +157,14 @@ private: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() < 2) - throw Exception{getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (arguments[0]->getTypeId() == TypeIndex::Tuple) return getReturnTypeForTuples(arguments); else if (arguments[0]->getTypeId() == TypeIndex::Map) return getReturnTypeForMaps(arguments); else - throw Exception{getName() + " only accepts maps", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(getName() + " only accepts maps", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } template @@ -301,9 +300,9 @@ private: case TypeIndex::Float64: return execute2(row_count, args, res_type); default: - throw Exception{ + throw Exception( "Illegal column type " + res_value_type->getName() + " for values in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -388,7 +387,7 @@ private: } else throw Exception{ - "Illegal column type " + key_type->getName() + " in arguments of function " + getName(), + "Illegal column type " + arguments[0].type->getName() + " in arguments of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } From ebceb6a5b40ecdd8140cd3e4bcb329903c5035e1 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 23 Jun 2021 18:54:26 +0300 Subject: [PATCH 062/183] fixed link --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 47ac756d6b8..6190b9b030b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1737,7 +1737,7 @@ These function can be tranformed: - [empty](../../sql-reference/functions/array-functions.md#function-empty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). - [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). - [isNull](../../sql-reference/operators/index.md#operator-is-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). -- [isNotNull](../../sql-reference/operators/index.md#is-not-null#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). - [count](../../sql-reference/aggregate-functions/reference/count.md) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). - [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). - [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). From 3ef23c4e228036a5c2d466472b8901873c9d81ed Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Jun 2021 00:33:22 +0300 Subject: [PATCH 063/183] Add pv to the test image --- docker/test/base/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 44b9d42d6a1..a722132c3a5 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -46,6 +46,7 @@ RUN apt-get update \ pigz \ pkg-config \ tzdata \ + pv \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) From 79ffb59d49090c1876ee5dacde3b1f1b78977296 Mon Sep 17 00:00:00 2001 From: yuchuansun Date: Thu, 24 Jun 2021 10:22:31 +0800 Subject: [PATCH 064/183] doc: [chinese] change wrong language format into correct --- docs/zh/engines/table-engines/special/file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/special/file.md b/docs/zh/engines/table-engines/special/file.md index 08f0a58070f..503d6d7e7f5 100644 --- a/docs/zh/engines/table-engines/special/file.md +++ b/docs/zh/engines/table-engines/special/file.md @@ -54,7 +54,7 @@ SELECT * FROM file_engine_table ## 在 Clickhouse-local 中的使用 {#zai-clickhouse-local-zhong-de-shi-yong} -使用 [ツ环板-ョツ嘉ッツ偲](../../../engines/table-engines/special/file.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 +使用 [clickhouse-local](../../../operations/utilities/clickhouse-local.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 **例如:** ``` bash From 6fae921d65d6a7de34e4e74e7fc600a0d4f21b72 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 24 Jun 2021 11:27:55 +0300 Subject: [PATCH 065/183] Smaller table in performance/dict_join.xml --- tests/performance/dict_join.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/dict_join.xml b/tests/performance/dict_join.xml index e12ef4abd63..1fa5ce1726c 100644 --- a/tests/performance/dict_join.xml +++ b/tests/performance/dict_join.xml @@ -15,7 +15,7 @@ INSERT INTO join_dictionary_source_table SELECT number, toString(number) - FROM numbers(10000000); + FROM numbers(1000000); From d28b12975d68beb82868e4e10839551301ed2a49 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:52:32 +0300 Subject: [PATCH 066/183] Update docs/ru/getting-started/install.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 2924958ddf4..84713adb60e 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -87,7 +87,7 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. -### Из единого бинарника {#from-single-binary} +### Из единого бинарного файла {#from-single-binary} Для установки ClickHouse на Linux можно использовать единый переносимый бинарник из последнего коммита ветки `master`: https://builds.clickhouse.tech/master/amd64/clickhouse. From 07421ffff0a34b419e0aa48a2ef21686362ddfb2 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:52:45 +0300 Subject: [PATCH 067/183] Update docs/ru/getting-started/install.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 84713adb60e..5e415c7a62d 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -89,7 +89,7 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh ### Из единого бинарного файла {#from-single-binary} -Для установки ClickHouse на Linux можно использовать единый переносимый бинарник из последнего коммита ветки `master`: https://builds.clickhouse.tech/master/amd64/clickhouse. +Для установки ClickHouse под Linux можно использовать единый переносимый бинарный файл из последнего коммита ветки `master`: [https://builds.clickhouse.tech/master/amd64/clickhouse]. ``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse From ff9fba33bbeb00607a0f3feb77bc68a32495c4a7 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:52:53 +0300 Subject: [PATCH 068/183] Update docs/en/getting-started/install.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 5cec83c3819..3de90156a41 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -96,7 +96,7 @@ To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.doc ### Single Binary {#from-single-binary} -You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: https://builds.clickhouse.tech/master/amd64/clickhouse. +You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.tech/master/amd64/clickhouse]. ``` bash curl -O 'https://builds.clickhouse.tech/master/amd64/clickhouse' && chmod a+x clickhouse From fce8316d9831d0812c9dd3ce39fae19625ff8a20 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:52:59 +0300 Subject: [PATCH 069/183] Update docs/ru/getting-started/install.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 5e415c7a62d..565aaac5cee 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -98,7 +98,7 @@ sudo ./clickhouse install ### Из исполняемых файлов для нестандартных окружений {#from-binaries-non-linux} -Для других операционных систем и архитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника из последнего коммита ветки `master` (с задержкой в несколько часов). +Для других операционных систем и архитектуры AArch64 сборки ClickHouse предоставляются в виде кросс-компилированного бинарного файла из последнего коммита ветки `master` (с задержкой в несколько часов). - [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` - [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` From 432ba0b885e6aaf8a6ebb353229a06011db0ec63 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:06 +0300 Subject: [PATCH 070/183] Update docs/ru/getting-started/install.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 565aaac5cee..e585021119d 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -108,7 +108,7 @@ sudo ./clickhouse install Чтобы установить ClickHouse в рамках всей системы (с необходимыми конфигурационными файлами, настройками пользователей и т.д.), выполните `sudo ./clickhouse install`. Затем выполните команды `clickhouse start` (чтобы запустить сервер) и `clickhouse-client` (чтобы подключиться к нему). -Данные сборки не рекомендуются для использования в продакшене, так как они недостаточно тщательно протестированны. Также, в них присутствуют не все возможности ClickHouse. +Данные сборки не рекомендуются для использования в рабочей базе данных, так как они недостаточно тщательно протестированы. Также в них присутствуют не все возможности ClickHouse. ### Из исходного кода {#from-sources} From 653ba9c86d57c2f138f072c2571583f050a5dca8 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:12 +0300 Subject: [PATCH 071/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 388d54c21a0..c0811c5d415 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -52,7 +52,7 @@ LIFETIME(MIN 300 MAX 360) ``` Если `0` и `0`, ClickHouse не перегружает словарь по истечению времени. -В этом случае, ClickHouse может перезагрузить данные словаря если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. +В этом случае ClickHouse может перезагрузить данные словаря, если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): @@ -116,4 +116,4 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher ... SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... -``` \ No newline at end of file +``` From 220d8217856dcf5460dffb8f77a2585ad123fc4a Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:18 +0300 Subject: [PATCH 072/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index c0811c5d415..573d4374fed 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -51,7 +51,7 @@ LIFETIME(300) LIFETIME(MIN 300 MAX 360) ``` -Если `0` и `0`, ClickHouse не перегружает словарь по истечению времени. +Если `0` и `0`, ClickHouse не перегружает словарь по истечении времени. В этом случае ClickHouse может перезагрузить данные словаря, если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): From c7d5282798ee11c702a80d8cfee2239afb5c57bb Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:25 +0300 Subject: [PATCH 073/183] Update docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 1d79c9a28bf..36b42ed6281 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -88,7 +88,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported. -It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration value of the previous update time in seconds will be added to the data request. Depends on source type Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC different logic will be applied to `update_field` before request data from an external source. +It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. - If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. - If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. @@ -116,4 +116,4 @@ or ... SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... -``` \ No newline at end of file +``` From e08587024c941cafde8d36b888dc25cf4f1615c8 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:33 +0300 Subject: [PATCH 074/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 573d4374fed..81b61566d86 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -88,7 +88,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher Для словарей `Cache`, `ComplexKeyCache`, `SSDCache` и `SSDComplexKeyCache` поддерживается как синхронное, так и асинхронное обновление. -Также словари `Flat`, `Hashed`, `ComplexKeyHashed` могут запрашивать только те данные, которые были изменены после предыдущего обновления. Если `update_field` указана как часть конфигурации источника словаря, к запросу данных будет добавлено время предыдущего обновления в секундах. В зависимости от типа источника (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC) к `update_field` будет применена соответствующая логика перед запросом данных из внешнего источника. +Словари `Flat`, `Hashed` и `ComplexKeyHashed` могут запрашивать только те данные, которые были изменены после предыдущего обновления. Если `update_field` указано как часть конфигурации источника словаря, к запросу данных будет добавлено время предыдущего обновления в секундах. В зависимости от типа источника (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC) к `update_field` будет применена соответствующая логика перед запросом данных из внешнего источника. - Если источник HTTP, то `update_field` будет добавлена в качестве параметра запроса, а время последнего обновления — в качестве значения параметра. - Если источник Executable, то `update_field` будет добавлена в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. From 2bb9bb6a86a79f86e61d1d1fba039902b8f0d838 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:39 +0300 Subject: [PATCH 075/183] Update docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 36b42ed6281..afef6ae249d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -92,7 +92,7 @@ It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to onl - If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. - If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. -- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of WHERE, where `update_field` is compared as greater or equal with the last update time. +- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data. From 5e017654f9ed81cf77b56f6996764a0b3acf7e4d Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:45 +0300 Subject: [PATCH 076/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 81b61566d86..7f0686cfcef 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -90,9 +90,9 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher Словари `Flat`, `Hashed` и `ComplexKeyHashed` могут запрашивать только те данные, которые были изменены после предыдущего обновления. Если `update_field` указано как часть конфигурации источника словаря, к запросу данных будет добавлено время предыдущего обновления в секундах. В зависимости от типа источника (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, ODBC) к `update_field` будет применена соответствующая логика перед запросом данных из внешнего источника. -- Если источник HTTP, то `update_field` будет добавлена в качестве параметра запроса, а время последнего обновления — в качестве значения параметра. -- Если источник Executable, то `update_field` будет добавлена в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. -- Если источник ClickHouse, MySQL, PostgreSQL или ODBC, то будет дополнительная часть запроса `WHERE`, где `update_field` будет больше или равна времени последнего обновления. +- Если источник HTTP, то `update_field` будет добавлено в качестве параметра запроса, а время последнего обновления — в качестве значения параметра. +- Если источник Executable, то `update_field` будет добавлено в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. +- Если источник ClickHouse, MySQL, PostgreSQL или ODBC, то будет дополнительная часть запроса `WHERE`, где `update_field` будет больше или равно времени последнего обновления. Если установлена опция `update_field`, то может быть установлена дополнительная опция `update_lag`. Значение параметра `update_lag` вычитается из времени предыдущего обновления перед запросом обновленных данных. From f1dfeb553f7a7c282714496fbf8e19d0add16719 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 24 Jun 2021 12:53:51 +0300 Subject: [PATCH 077/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 7f0686cfcef..9d4205ab1d1 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -94,7 +94,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher - Если источник Executable, то `update_field` будет добавлено в качестве аргумента исполняемого скрипта, время последнего обновления — в качестве значения аргумента. - Если источник ClickHouse, MySQL, PostgreSQL или ODBC, то будет дополнительная часть запроса `WHERE`, где `update_field` будет больше или равно времени последнего обновления. -Если установлена опция `update_field`, то может быть установлена дополнительная опция `update_lag`. Значение параметра `update_lag` вычитается из времени предыдущего обновления перед запросом обновленных данных. +Если установлена опция `update_field`, то может быть установлена дополнительная опция `update_lag`. Значение `update_lag` вычитается из времени предыдущего обновления перед запросом обновленных данных. Пример настройки: From bb35a113bf57e92c8194b22db2bc4731f1108c17 Mon Sep 17 00:00:00 2001 From: nickzhwang Date: Thu, 24 Jun 2021 20:18:14 +0800 Subject: [PATCH 078/183] fix broken links and incorrect translations --- .../system-tables/asynchronous_metric_log.md | 2 +- .../system-tables/asynchronous_metrics.md | 4 +- docs/en/operations/system-tables/clusters.md | 2 +- docs/en/operations/system-tables/columns.md | 2 +- .../operations/system-tables/contributors.md | 2 +- .../operations/system-tables/current-roles.md | 2 +- .../system-tables/data_type_families.md | 2 +- docs/en/operations/system-tables/databases.md | 2 +- .../system-tables/detached_parts.md | 2 +- .../operations/system-tables/dictionaries.md | 2 +- docs/en/operations/system-tables/disks.md | 2 +- .../operations/system-tables/enabled-roles.md | 2 +- docs/en/operations/system-tables/events.md | 2 +- docs/en/operations/system-tables/functions.md | 2 +- docs/en/operations/system-tables/grants.md | 2 +- .../system-tables/graphite_retentions.md | 2 +- docs/en/operations/system-tables/licenses.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/en/operations/system-tables/merges.md | 2 +- .../en/operations/system-tables/metric_log.md | 2 +- docs/en/operations/system-tables/metrics.md | 2 +- docs/en/operations/system-tables/mutations.md | 2 +- docs/en/operations/system-tables/numbers.md | 2 +- .../en/operations/system-tables/numbers_mt.md | 2 +- docs/en/operations/system-tables/one.md | 2 +- docs/en/operations/system-tables/part_log.md | 2 +- docs/en/operations/system-tables/parts.md | 2 +- docs/en/operations/system-tables/processes.md | 2 +- docs/en/operations/system-tables/query_log.md | 2 +- .../system-tables/query_thread_log.md | 2 +- .../operations/system-tables/quota_limits.md | 2 +- .../operations/system-tables/quota_usage.md | 2 +- docs/en/operations/system-tables/quotas.md | 2 +- .../operations/system-tables/quotas_usage.md | 2 +- docs/en/operations/system-tables/replicas.md | 2 +- .../operations/system-tables/role-grants.md | 2 +- docs/en/operations/system-tables/roles.md | 2 +- .../operations/system-tables/row_policies.md | 2 +- docs/en/operations/system-tables/settings.md | 2 +- .../settings_profile_elements.md | 2 +- .../system-tables/settings_profiles.md | 2 +- .../system-tables/storage_policies.md | 2 +- .../operations/system-tables/table_engines.md | 2 +- docs/en/operations/system-tables/tables.md | 2 +- docs/en/operations/system-tables/text_log.md | 2 +- .../en/operations/system-tables/time_zones.md | 2 +- docs/en/operations/system-tables/trace_log.md | 2 +- docs/en/operations/system-tables/users.md | 2 +- docs/en/operations/system-tables/zookeeper.md | 2 +- .../system-tables/asynchronous_metric_log.md | 2 +- .../system-tables/asynchronous_metrics.md | 14 +++---- docs/zh/operations/system-tables/clusters.md | 25 ++++++------ docs/zh/operations/system-tables/columns.md | 2 + docs/zh/operations/system-tables/tables.md | 26 +++++++------ docs/zh/operations/system-tables/zookeeper.md | 39 ++++++++++--------- 55 files changed, 109 insertions(+), 99 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 75607cc30b0..56803174cbd 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -36,4 +36,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 - [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics that are calculated periodically in the background. - [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metric_log) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index a401c7a723b..d9f427cf783 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metrics) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index e9721379d7b..bf56ac7d33f 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -23,4 +23,4 @@ Please note that `errors_count` is updated once per query to the cluster, but `e - [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/clusters) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 4d8077ddeac..33b284fc816 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -21,4 +21,4 @@ The `system.columns` table contains the following columns (the column type is sh - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/columns) diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index 37d01ef6204..a718c403c11 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -38,4 +38,4 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' │ Olga Khvostikova │ └──────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/contributors) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/contributors) diff --git a/docs/en/operations/system-tables/current-roles.md b/docs/en/operations/system-tables/current-roles.md index f10dbe69918..56dbb602637 100644 --- a/docs/en/operations/system-tables/current-roles.md +++ b/docs/en/operations/system-tables/current-roles.md @@ -8,4 +8,4 @@ Columns: - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a role with `ADMIN OPTION` privilege. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a default role. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/current-roles) + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/current-roles) diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index ddda91ed151..e149b9de3e4 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -33,4 +33,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' - [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/data_type_families) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/data_type_families) diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 84b696a3bf8..9318df30a28 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -6,4 +6,4 @@ Each database that the server knows about has a corresponding entry in the table This system table is used for implementing the `SHOW DATABASES` query. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/databases) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/databases) diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md index ade89bd40c4..a5748128426 100644 --- a/docs/en/operations/system-tables/detached_parts.md +++ b/docs/en/operations/system-tables/detached_parts.md @@ -8,4 +8,4 @@ For the description of other columns, see [system.parts](../../operations/system If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/detached_parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/detached_parts) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 3d3bbe2af4e..4a94ff5b41b 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -60,4 +60,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/dictionaries) diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index 9c01b6d9aa4..2bd871e73ee 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -10,4 +10,4 @@ Columns: - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/disks) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/disks) diff --git a/docs/en/operations/system-tables/enabled-roles.md b/docs/en/operations/system-tables/enabled-roles.md index 27875fcf984..c03129b32dd 100644 --- a/docs/en/operations/system-tables/enabled-roles.md +++ b/docs/en/operations/system-tables/enabled-roles.md @@ -9,4 +9,4 @@ Columns: - `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a current role of a current user. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a default role. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/enabled-roles) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/enabled-roles) diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index d23533189c7..e6d4e1cf905 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -31,4 +31,4 @@ SELECT * FROM system.events LIMIT 5 - [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/events) diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index d9a5e3cc363..6441bbf59e4 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,4 +7,4 @@ Columns: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/functions) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/functions) diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index fb2a91ab30a..927fa4f3227 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -21,4 +21,4 @@ Columns: - `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/grants) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/grants) diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index f5d65bbe3fe..4aeccee9cfd 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -14,4 +14,4 @@ Columns: - `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. - `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/graphite_retentions) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/graphite_retentions) diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index c95e4e8b9b4..a9cada507c6 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -36,4 +36,4 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/licenses) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/licenses) diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index 78aab24cb41..2d593392894 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -10,4 +10,4 @@ Columns: - `type` (String) — Setting type (implementation specific string value). - `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merge_tree_settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merge_tree_settings) diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index 3e712e2962c..c7bdaee42e1 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -22,4 +22,4 @@ Columns: - `merge_type` — The type of current merge. Empty if it's an mutation. - `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merges) diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 063fe81923b..1166ead7f9d 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -54,4 +54,4 @@ CurrentMetric_DistributedFilesToInsert: 0 - [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metric_log) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index cf4c6efe8d4..fcd499cc2fc 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -38,4 +38,4 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metrics) diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index e5ea7eab457..24fa559197c 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -45,4 +45,4 @@ If there were problems with mutating some data parts, the following columns cont - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/mutations) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/mutations) diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index 9b7e148242c..d75487c0297 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -6,4 +6,4 @@ You can use this table for tests, or if you need to do a brute force search. Reads from this table are not parallelized. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers) diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index 870b256223e..d8b44ce4e7a 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -4,4 +4,4 @@ The same as [system.numbers](../../operations/system-tables/numbers.md) but read Used for tests. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers_mt) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers_mt) diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index 854fab32730..ee8d79f0f17 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -6,4 +6,4 @@ This table is used if a `SELECT` query doesn’t specify the `FROM` clause. This is similar to the `DUAL` table found in other DBMSs. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/one) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/one) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index e7c157077e3..e340b0fa046 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -31,4 +31,4 @@ The `system.part_log` table contains the following columns: The `system.part_log` table is created after the first inserting data to the `MergeTree` table. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/part_log) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index f02d1ebc114..872125d7fda 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -155,4 +155,4 @@ move_ttl_info.max: [] - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) - [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/parts) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 0e44c61a4f6..43595ac11f7 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -14,4 +14,4 @@ Columns: - `query` (String) – The query text. For `INSERT`, it doesn’t include the data to insert. - `query_id` (String) – Query ID, if defined. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/processes) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 72927b5a7e9..a334e49a54b 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -138,5 +138,5 @@ Settings.Values: ['0','random','1','10000000000'] - [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_log) diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 3dcd05c4cc3..35f9b4e1341 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -113,4 +113,4 @@ ProfileEvents.Values: [1,97,81,5,81] - [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_thread_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 065296f5df3..b8964b74af4 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -16,4 +16,4 @@ Columns: - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_limits) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_limits) diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 0eb59fd6453..7f8495c0288 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -27,4 +27,4 @@ Columns: - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quota_usage) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_usage) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index f4f52a4a131..1692f3911f2 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -24,5 +24,5 @@ Columns: - [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index ed6be820b26..ec3e0db7b3d 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -28,4 +28,4 @@ Columns: - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas_usage) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas_usage) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index 17519690951..a96b517518e 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -120,5 +120,5 @@ WHERE If this query doesn’t return anything, it means that everything is fine. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/replicas) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 5eb18b0dca7..d90bc1f77be 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -18,4 +18,4 @@ Columns: - 1 — The role has `ADMIN OPTION` privilege. - 0 — The role without `ADMIN OPTION` privilege. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/role-grants) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/role-grants) diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 4ab5102dfc8..e68d5ed290a 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -12,4 +12,4 @@ Columns: - [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/roles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/roles) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 97474d1b3ee..767270d64ae 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -31,4 +31,4 @@ Columns: - [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/row_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/row_policies) diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index a1db0a3d558..685bd25bd9e 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -49,4 +49,4 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' - [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) - [Constraints on Settings](../../operations/settings/constraints-on-settings.md) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings) diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index d0f2c3c4527..3c8c728e645 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -27,4 +27,4 @@ Columns: - `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profile_elements) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profile_elements) diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index a06b26b9cb6..80dc5172f4e 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -21,4 +21,4 @@ Columns: - [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profiles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profiles) diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index c8171b50aed..1c243fc58d5 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -13,4 +13,4 @@ Columns: If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/storage_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/storage_policies) diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 4ca1fc657ee..dbaad893efa 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -34,4 +34,4 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') - Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) - Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/table_engines) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/table_engines) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index e69b8aa67a0..41a296705ba 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -52,4 +52,4 @@ This table contains the following columns (the column type is shown in brackets) The `system.tables` table is used in `SHOW TABLES` query implementation. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/tables) diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index bd92519b96b..32f626b3db6 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -25,4 +25,4 @@ Columns: - `source_file` (LowCardinality(String)) — Source file from which the logging was done. - `source_line` (UInt64) — Source line from which the logging was done. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/text_log) diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 1b84ae7fe37..fa467124884 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -27,4 +27,4 @@ SELECT * FROM system.time_zones LIMIT 10 └────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/time_zones) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/time_zones) diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index b911fdd2263..3d83db3bb89 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -47,4 +47,4 @@ query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/trace_log) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/trace_log) diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index 2227816aff3..11fdeb1e9ae 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -31,4 +31,4 @@ Columns: - [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/users) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/users) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index ddb4d305964..c975970abd9 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -68,4 +68,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/zookeeper) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/zookeeper) diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 9f6c697a18e..c6eb7754c23 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -3,6 +3,6 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -## 系统。asynchronous\_metric\_log {#system-tables-async-log} +## system.asynchronous_metric_log {#system-tables-async-log} 包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/zh/operations/system-tables/asynchronous_metrics.md b/docs/zh/operations/system-tables/asynchronous_metrics.md index 2bd615085a8..9180e88f2d0 100644 --- a/docs/zh/operations/system-tables/asynchronous_metrics.md +++ b/docs/zh/operations/system-tables/asynchronous_metrics.md @@ -3,14 +3,14 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。asynchronous\_metrics {#system_tables-asynchronous_metrics} +# system.asynchronous_metrics {#system_tables-asynchronous_metrics} 包含在后台定期计算的指标。 例如,在使用的RAM量。 列: -- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名。 +- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 **示例** @@ -35,7 +35,7 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 **另请参阅** -- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — ClickHouse监控的基本概念。 +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — 包含即时计算的指标。 +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — 包含出现的事件的次数。 +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics` 和 `system.events`表中的指标的历史值。 diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md index 4bc8d4210ff..e901ab039b6 100644 --- a/docs/zh/operations/system-tables/clusters.md +++ b/docs/zh/operations/system-tables/clusters.md @@ -3,22 +3,23 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。集群 {#system-clusters} +# system.clusters{#system-clusters} 包含有关配置文件中可用的集群及其中的服务器的信息。 列: -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32)-此主机无法到达副本的次数。 -- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 +- `cluster` (String) — 集群名。 +- `shard_num` (UInt32) — 集群中的分片数,从1开始。 +- `shard_weight` (UInt32) — 写数据时该分片的相对权重。 +- `replica_num` (UInt32) — 分片的副本数量,从1开始。 +- `host_name` (String) — 配置中指定的主机名。 +- `host_address` (String) — 从DNS获取的主机IP地址。 +- `port` (UInt16) — 连接到服务器的端口。 +- `user` (String) — 连接到服务器的用户名。 +- `errors_count` (UInt32) - 此主机无法访问副本的次数。 +- `slowdowns_count` (UInt32) - 与对冲请求建立连接时导致更改副本的减速次数。 +- `estimated_recovery_time` (UInt32) - 剩下的秒数,直到副本错误计数归零并被视为恢复正常。 请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 @@ -27,3 +28,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [表引擎分布式](../../engines/table-engines/special/distributed.md) - [distributed\_replica\_error\_cap设置](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed\_replica\_error\_half\_life设置](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/clusters) diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md index 24296dc715c..b21be98c0dc 100644 --- a/docs/zh/operations/system-tables/columns.md +++ b/docs/zh/operations/system-tables/columns.md @@ -25,3 +25,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/columns) diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md index a690e938a3a..0c3e913b9bb 100644 --- a/docs/zh/operations/system-tables/tables.md +++ b/docs/zh/operations/system-tables/tables.md @@ -5,15 +5,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。表 {#system-tables} -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。 此表包含以下列(列类型显示在括号中): -- `database` (String) — The name of the database the table is in. +- `database` (String) — 表所在的数据库表名。 -- `name` (String) — Table name. +- `name` (String) — 表名。 -- `engine` (String) — Table engine name (without parameters). +- `engine` (String) — 表引擎名 (不包含参数)。 - `is_temporary` (UInt8)-指示表是否是临时的标志。 @@ -23,11 +23,11 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 -- `dependencies_database` (数组(字符串))-数据库依赖关系. +- `dependencies_database` (数组(字符串))-数据库依赖关系。 - `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 -- `create_table_query` (String)-用于创建表的查询。 +- `create_table_query` (String)-用于创建表的SQL语句。 - `engine_full` (String)-表引擎的参数。 @@ -44,11 +44,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - [分布](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则行数为`Null`(包括底层 `Buffer` 表)。 -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则字节数为`Null` (即**不** 包括任何底层存储)。 - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + - 如果表将数据存在磁盘上,返回实际使用的磁盘空间(压缩后)。 + - 如果表在内存中存储数据,返回在内存中使用的近似字节数。 -该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 +- `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表)。 + +`system.tables` 表被用于 `SHOW TABLES` 的查询实现中。 + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/tables) diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index b66e5262df3..79c9c041ca8 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -3,31 +3,31 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。动物园管理员 {#system-zookeeper} +# system.zookeeper {#system-zookeeper} -如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +如果未配置ZooKeeper,则该表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的相等条件。 这是ZooKeeper中您想要获取数据的子路径。 -查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 -要输出所有根节点的数据,write path= ‘/’. +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出`/clickhouse`节点的对所有子路径的数据。 +要输出所有根节点的数据,使用path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 列: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — 节点的名字。 +- `path` (String) — 节点的路径。 +- `value` (String) — 节点的值。 +- `dataLength` (Int32) — 节点的值长度。 +- `numChildren` (Int32) — 子节点的个数。 +- `czxid` (Int64) — 创建该节点的事务ID。 +- `mzxid` (Int64) — 最后修改该节点的事务ID。 +- `pzxid` (Int64) — 最后删除或者增加子节点的事务ID。 +- `ctime` (DateTime) — 节点的创建时间。 +- `mtime` (DateTime) — 节点的最后修改时间。 +- `version` (Int32) — 节点版本:节点被修改的次数。 +- `cversion` (Int32) — 增加或删除子节点的个数。 +- `aversion` (Int32) — ACL的修改次数。 +- `ephemeralOwner` (Int64) — 针对临时节点,拥有该节点的事务ID。 示例: @@ -73,3 +73,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/zookeeper) From 9fc890abf579c783fa6131eed915c187598b395a Mon Sep 17 00:00:00 2001 From: nickzhwang Date: Thu, 24 Jun 2021 20:47:02 +0800 Subject: [PATCH 079/183] resolve conflicts --- .../system-tables/asynchronous_metric_log.md | 2 +- .../system-tables/asynchronous_metrics.md | 4 +- docs/en/operations/system-tables/clusters.md | 2 +- docs/en/operations/system-tables/columns.md | 2 +- .../operations/system-tables/contributors.md | 2 +- .../operations/system-tables/current-roles.md | 2 +- .../system-tables/data_type_families.md | 2 +- docs/en/operations/system-tables/databases.md | 2 +- .../system-tables/detached_parts.md | 2 +- .../operations/system-tables/dictionaries.md | 2 +- docs/en/operations/system-tables/disks.md | 6 +-- .../operations/system-tables/enabled-roles.md | 2 +- docs/en/operations/system-tables/events.md | 2 +- docs/en/operations/system-tables/functions.md | 6 +-- docs/en/operations/system-tables/grants.md | 2 +- .../system-tables/graphite_retentions.md | 2 +- docs/en/operations/system-tables/licenses.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/en/operations/system-tables/merges.md | 2 +- .../en/operations/system-tables/metric_log.md | 2 +- docs/en/operations/system-tables/metrics.md | 2 +- docs/en/operations/system-tables/mutations.md | 2 +- docs/en/operations/system-tables/numbers.md | 2 +- .../en/operations/system-tables/numbers_mt.md | 2 +- docs/en/operations/system-tables/one.md | 2 +- docs/en/operations/system-tables/part_log.md | 2 +- docs/en/operations/system-tables/parts.md | 2 +- docs/en/operations/system-tables/processes.md | 3 +- docs/en/operations/system-tables/query_log.md | 2 +- .../system-tables/query_thread_log.md | 2 +- .../operations/system-tables/quota_limits.md | 2 + .../operations/system-tables/quota_usage.md | 2 + docs/en/operations/system-tables/quotas.md | 2 +- .../operations/system-tables/quotas_usage.md | 4 +- docs/en/operations/system-tables/replicas.md | 2 +- .../operations/system-tables/role-grants.md | 2 +- docs/en/operations/system-tables/roles.md | 2 +- .../operations/system-tables/row_policies.md | 2 +- docs/en/operations/system-tables/settings.md | 2 +- .../settings_profile_elements.md | 2 +- .../system-tables/settings_profiles.md | 2 +- .../system-tables/storage_policies.md | 2 +- .../operations/system-tables/table_engines.md | 2 +- docs/en/operations/system-tables/tables.md | 2 +- docs/en/operations/system-tables/text_log.md | 2 +- .../en/operations/system-tables/time_zones.md | 2 +- docs/en/operations/system-tables/trace_log.md | 1 - docs/en/operations/system-tables/users.md | 2 +- docs/en/operations/system-tables/zookeeper.md | 2 +- .../system-tables/asynchronous_metric_log.md | 2 +- .../system-tables/asynchronous_metrics.md | 15 ++++--- docs/zh/operations/system-tables/clusters.md | 25 ++++++------ docs/zh/operations/system-tables/columns.md | 2 + docs/zh/operations/system-tables/tables.md | 26 +++++++------ docs/zh/operations/system-tables/zookeeper.md | 39 ++++++++++--------- 55 files changed, 114 insertions(+), 105 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 5dcfca5fbda..b0480dc256a 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -36,4 +36,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 - [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics, calculated periodically in the background. - [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metric_log) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index b27434793c7..fc801aa1c80 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metrics) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 096eca12e7d..16cf183de53 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -68,4 +68,4 @@ estimated_recovery_time: 0 - [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/clusters) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 9160dca9a1a..471a1af1fe0 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -69,4 +69,4 @@ is_in_sampling_key: 0 compression_codec: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/columns) diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index 37d01ef6204..a718c403c11 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -38,4 +38,4 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' │ Olga Khvostikova │ └──────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/contributors) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/contributors) diff --git a/docs/en/operations/system-tables/current-roles.md b/docs/en/operations/system-tables/current-roles.md index f10dbe69918..56dbb602637 100644 --- a/docs/en/operations/system-tables/current-roles.md +++ b/docs/en/operations/system-tables/current-roles.md @@ -8,4 +8,4 @@ Columns: - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a role with `ADMIN OPTION` privilege. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a default role. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/current-roles) + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/current-roles) diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index 4e439f13aa5..fdce9c33b37 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -33,4 +33,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' - [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/data_type_families) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/data_type_families) diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 8ef5551d9b0..2c78fd25c2b 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -35,4 +35,4 @@ SELECT * FROM system.databases └────────────────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/databases) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/databases) diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md index ade89bd40c4..a5748128426 100644 --- a/docs/en/operations/system-tables/detached_parts.md +++ b/docs/en/operations/system-tables/detached_parts.md @@ -8,4 +8,4 @@ For the description of other columns, see [system.parts](../../operations/system If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/detached_parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/detached_parts) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 2bc1be51f19..a34e893599c 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -61,4 +61,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/dictionaries) diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index e9d324580d8..833a0b3b16b 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -10,9 +10,6 @@ Columns: - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/disks) - - **Example** ```sql @@ -27,5 +24,4 @@ Columns: 1 rows in set. Elapsed: 0.001 sec. ``` - - +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/disks) diff --git a/docs/en/operations/system-tables/enabled-roles.md b/docs/en/operations/system-tables/enabled-roles.md index 27875fcf984..c03129b32dd 100644 --- a/docs/en/operations/system-tables/enabled-roles.md +++ b/docs/en/operations/system-tables/enabled-roles.md @@ -9,4 +9,4 @@ Columns: - `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a current role of a current user. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a default role. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/enabled-roles) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/enabled-roles) diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index b4ced6e6bf6..2fcb5d8edec 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -31,4 +31,4 @@ SELECT * FROM system.events LIMIT 5 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/events) diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index fbcd4b7b723..888e768fc93 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,8 +7,6 @@ Columns: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/functions) - **Example** ```sql @@ -30,4 +28,6 @@ Columns: └──────────────────────────┴──────────────┴──────────────────┴──────────┘ 10 rows in set. Elapsed: 0.002 sec. -``` \ No newline at end of file +``` + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/functions) diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index fb2a91ab30a..927fa4f3227 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -21,4 +21,4 @@ Columns: - `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/grants) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/grants) diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index 7ae5e0e36a8..0d56242dc95 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -14,4 +14,4 @@ Columns: - `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. - `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/graphite_retentions) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/graphite_retentions) diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index c95e4e8b9b4..a9cada507c6 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -36,4 +36,4 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/licenses) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/licenses) diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index c2c5703f869..309c1cbc9d1 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -51,4 +51,4 @@ type: SettingUInt64 4 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merge_tree_settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merge_tree_settings) diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index 3e712e2962c..c7bdaee42e1 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -22,4 +22,4 @@ Columns: - `merge_type` — The type of current merge. Empty if it's an mutation. - `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merges) diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 1f72c9a7358..ab149703309 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -48,4 +48,4 @@ CurrentMetric_DistributedFilesToInsert: 0 - [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metric_log) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index decae8ea7fb..4afab40764b 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -38,4 +38,4 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metrics) diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index e5ea7eab457..24fa559197c 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -45,4 +45,4 @@ If there were problems with mutating some data parts, the following columns cont - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/mutations) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/mutations) diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index d1737c9abbb..bf948d9dd5b 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -29,4 +29,4 @@ Reads from this table are not parallelized. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers) diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index b40dc9a2d6f..d7df1bc1e0e 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -27,4 +27,4 @@ Used for tests. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers_mt) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers_mt) diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index 51316dfbc44..10b2a1757d0 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -20,4 +20,4 @@ This is similar to the `DUAL` table found in other DBMSs. 1 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/one) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/one) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 3f9110349dd..b815d2366bb 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -66,4 +66,4 @@ error: 0 exception: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/part_log) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 5a4715a4513..b9b5aa09b64 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -155,4 +155,4 @@ move_ttl_info.max: [] - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) - [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/parts) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 9ef3c648006..9401be79e85 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -14,7 +14,6 @@ Columns: - `query` (String) – The query text. For `INSERT`, it does not include the data to insert. - `query_id` (String) – Query ID, if defined. - ```sql :) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; ``` @@ -61,4 +60,4 @@ Settings.Values: ['0','in_order','1','10000000000'] 1 rows in set. Elapsed: 0.002 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/processes) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 85f0679fe37..a7d5d9b61f6 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -177,4 +177,4 @@ used_table_functions: ['numbers'] - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_log) diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 296a33259b3..7b098e21b80 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -115,4 +115,4 @@ ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47, - [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_thread_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 11616990206..0088b086e8c 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -17,3 +17,5 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds. + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_limits) diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 89fdfe70069..2f35b6b7dae 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -28,3 +28,5 @@ Columns: ## See Also {#see-also} - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_usage) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index 3e797c9bdc6..6acc349a54f 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -24,5 +24,5 @@ Columns: - [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index 04cf91cb990..6ba88cb935a 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -30,4 +30,6 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) \ No newline at end of file +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas_usage) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index 63a2141e399..5a6ec54723b 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -120,5 +120,5 @@ WHERE If this query does not return anything, it means that everything is fine. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/replicas) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 5eb18b0dca7..d90bc1f77be 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -18,4 +18,4 @@ Columns: - 1 — The role has `ADMIN OPTION` privilege. - 0 — The role without `ADMIN OPTION` privilege. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/role-grants) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/role-grants) diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 4ab5102dfc8..e68d5ed290a 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -12,4 +12,4 @@ Columns: - [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/roles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/roles) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 97474d1b3ee..767270d64ae 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -31,4 +31,4 @@ Columns: - [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/row_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/row_policies) diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index 7034fe1204f..cfd9f43655a 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -50,4 +50,4 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' - [Constraints on Settings](../../operations/settings/constraints-on-settings.md) - [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings) diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index d0f2c3c4527..3c8c728e645 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -27,4 +27,4 @@ Columns: - `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profile_elements) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profile_elements) diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index a06b26b9cb6..80dc5172f4e 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -21,4 +21,4 @@ Columns: - [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profiles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profiles) diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 5adab1cb2aa..4b07b593926 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -14,4 +14,4 @@ Columns: If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/storage_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/storage_policies) diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 30122cb133e..45ff6f1ac19 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -35,4 +35,4 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') - Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) - Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/table_engines) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/table_engines) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 480db3087f6..4d7b20be311 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -117,4 +117,4 @@ lifetime_bytes: ᴺᵁᴸᴸ comment: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/tables) diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index f5f53c95653..ad95e91f0d2 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -50,4 +50,4 @@ source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void source_line: 45 ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/text_log) diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 1b84ae7fe37..fa467124884 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -27,4 +27,4 @@ SELECT * FROM system.time_zones LIMIT 10 └────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/time_zones) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/time_zones) diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index e4c01a65d9d..5de597a0a51 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -55,4 +55,3 @@ size: 5244400 ``` [Original article](https://clickhouse.tech/docs/en/operations/system-tables/trace_log) - diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index 2227816aff3..11fdeb1e9ae 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -31,4 +31,4 @@ Columns: - [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/users) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/users) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 3b8db14934e..52d1c686e52 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -72,4 +72,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/zookeeper) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/zookeeper) diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 9fbe15b8507..ff7593768d3 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -3,6 +3,6 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -## 系统。asynchronous_metric_log {#system-tables-async-log} +## system.asynchronous_metric_log {#system-tables-async-log} 包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/zh/operations/system-tables/asynchronous_metrics.md b/docs/zh/operations/system-tables/asynchronous_metrics.md index 805477c9f47..5a302f6da7b 100644 --- a/docs/zh/operations/system-tables/asynchronous_metrics.md +++ b/docs/zh/operations/system-tables/asynchronous_metrics.md @@ -3,14 +3,14 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。asynchronous_metrics {#system_tables-asynchronous_metrics} +# system.asynchronous_metrics {#system_tables-asynchronous_metrics} 包含在后台定期计算的指标。 例如,在使用的RAM量。 列: -- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名。 +- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 **示例** @@ -34,8 +34,7 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 ``` **另请参阅** - -- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — ClickHouse监控的基本概念。 +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — 包含即时计算的指标。 +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — 包含出现的事件的次数。 +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics` 和 `system.events`表中的指标的历史值。 diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md index 1e5935c276e..f76288f4bd8 100644 --- a/docs/zh/operations/system-tables/clusters.md +++ b/docs/zh/operations/system-tables/clusters.md @@ -3,22 +3,23 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。集群 {#system-clusters} +# system.clusters{#system-clusters} 包含有关配置文件中可用的集群及其中的服务器的信息。 列: -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32)-此主机无法到达副本的次数。 -- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 +- `cluster` (String) — 集群名。 +- `shard_num` (UInt32) — 集群中的分片数,从1开始。 +- `shard_weight` (UInt32) — 写数据时该分片的相对权重。 +- `replica_num` (UInt32) — 分片的副本数量,从1开始。 +- `host_name` (String) — 配置中指定的主机名。 +- `host_address` (String) — 从DNS获取的主机IP地址。 +- `port` (UInt16) — 连接到服务器的端口。 +- `user` (String) — 连接到服务器的用户名。 +- `errors_count` (UInt32) - 此主机无法访问副本的次数。 +- `slowdowns_count` (UInt32) - 与对冲请求建立连接时导致更改副本的减速次数。 +- `estimated_recovery_time` (UInt32) - 剩下的秒数,直到副本错误计数归零并被视为恢复正常。 请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 @@ -27,3 +28,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [表引擎分布式](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap设置](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life设置](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/clusters) diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md index 24296dc715c..b21be98c0dc 100644 --- a/docs/zh/operations/system-tables/columns.md +++ b/docs/zh/operations/system-tables/columns.md @@ -25,3 +25,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/columns) diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md index a690e938a3a..0c3e913b9bb 100644 --- a/docs/zh/operations/system-tables/tables.md +++ b/docs/zh/operations/system-tables/tables.md @@ -5,15 +5,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。表 {#system-tables} -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。 此表包含以下列(列类型显示在括号中): -- `database` (String) — The name of the database the table is in. +- `database` (String) — 表所在的数据库表名。 -- `name` (String) — Table name. +- `name` (String) — 表名。 -- `engine` (String) — Table engine name (without parameters). +- `engine` (String) — 表引擎名 (不包含参数)。 - `is_temporary` (UInt8)-指示表是否是临时的标志。 @@ -23,11 +23,11 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 -- `dependencies_database` (数组(字符串))-数据库依赖关系. +- `dependencies_database` (数组(字符串))-数据库依赖关系。 - `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 -- `create_table_query` (String)-用于创建表的查询。 +- `create_table_query` (String)-用于创建表的SQL语句。 - `engine_full` (String)-表引擎的参数。 @@ -44,11 +44,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - [分布](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则行数为`Null`(包括底层 `Buffer` 表)。 -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则字节数为`Null` (即**不** 包括任何底层存储)。 - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + - 如果表将数据存在磁盘上,返回实际使用的磁盘空间(压缩后)。 + - 如果表在内存中存储数据,返回在内存中使用的近似字节数。 -该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 +- `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表)。 + +`system.tables` 表被用于 `SHOW TABLES` 的查询实现中。 + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/tables) diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index f7e816ccee6..ca767fba7aa 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -3,13 +3,13 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。动物园管理员 {#system-zookeeper} +# system.zookeeper {#system-zookeeper} -如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +如果未配置ZooKeeper,则该表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的相等条件。 这是ZooKeeper中您想要获取数据的子路径。 -查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 -要输出所有根节点的数据,write path= ‘/’. +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出`/clickhouse`节点的对所有子路径的数据。 +要输出所有根节点的数据,使用path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。 @@ -18,20 +18,20 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 列: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — 节点的名字。 +- `path` (String) — 节点的路径。 +- `value` (String) — 节点的值。 +- `dataLength` (Int32) — 节点的值长度。 +- `numChildren` (Int32) — 子节点的个数。 +- `czxid` (Int64) — 创建该节点的事务ID。 +- `mzxid` (Int64) — 最后修改该节点的事务ID。 +- `pzxid` (Int64) — 最后删除或者增加子节点的事务ID。 +- `ctime` (DateTime) — 节点的创建时间。 +- `mtime` (DateTime) — 节点的最后修改时间。 +- `version` (Int32) — 节点版本:节点被修改的次数。 +- `cversion` (Int32) — 增加或删除子节点的个数。 +- `aversion` (Int32) — ACL的修改次数。 +- `ephemeralOwner` (Int64) — 针对临时节点,拥有该节点的事务ID。 示例: @@ -77,3 +77,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/zookeeper) From 76156af5cc30b8706e2b9527811706cce99a452d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 24 Jun 2021 17:07:43 +0300 Subject: [PATCH 080/183] cancel merges on drop partition --- src/Storages/MergeTree/MergeList.cpp | 4 ++-- src/Storages/MergeTree/MergeList.h | 16 ++++++++++++++-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 ++++++-- src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++++++ 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index c6f9459d0db..24beb0cc06f 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -13,7 +13,7 @@ MergeListElement::MergeListElement(const StorageID & table_id_, const FutureMerg , partition_id{future_part.part_info.partition_id} , result_part_name{future_part.name} , result_part_path{future_part.path} - , result_data_version{future_part.part_info.getDataVersion()} + , result_part_info{future_part.part_info} , num_parts{future_part.parts.size()} , thread_id{getThreadId()} , merge_type{future_part.merge_type} @@ -32,7 +32,7 @@ MergeListElement::MergeListElement(const StorageID & table_id_, const FutureMerg if (!future_part.parts.empty()) { source_data_version = future_part.parts[0]->info.getDataVersion(); - is_mutation = (result_data_version != source_data_version); + is_mutation = (result_part_info.getDataVersion() != source_data_version); } /// Each merge is executed into separate background processing pool thread diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 9680ce6ac30..6f4aedcc6f8 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct MergeListElement : boost::noncopyable const std::string result_part_name; const std::string result_part_path; - Int64 result_data_version{}; + MergeTreePartInfo result_part_info; bool is_mutation{}; UInt64 num_parts{}; @@ -130,7 +131,18 @@ public: if ((partition_id.empty() || merge_element.partition_id == partition_id) && merge_element.table_id == table_id && merge_element.source_data_version < mutation_version - && merge_element.result_data_version >= mutation_version) + && merge_element.result_part_info.getDataVersion() >= mutation_version) + merge_element.is_cancelled = true; + } + } + + void cancelInPartition(const StorageID & table_id, const String & partition_id, Int64 delimiting_block_number) + { + for (auto & merge_element : entries) + { + if (merge_element.table_id == table_id + && merge_element.partition_id == partition_id + && merge_element.result_part_info.min_block < delimiting_block_number) merge_element.is_cancelled = true; } } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 846ad7b026d..b4f3d433f66 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -951,8 +951,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor size_t rows_written = 0; const size_t initial_reservation = space_reservation ? space_reservation->getSize() : 0; - auto is_cancelled = [&]() { return merges_blocker.isCancelled() - || (need_remove_expired_values && ttl_merges_blocker.isCancelled()); }; + auto is_cancelled = [&]() + { + return merges_blocker.isCancelled() + || (need_remove_expired_values && ttl_merges_blocker.isCancelled()) + || merge_entry->is_cancelled.load(std::memory_order_relaxed); + }; Block block; while (!is_cancelled() && (block = merged_stream->read())) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 47f6bbd0ccc..75ff8a93980 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2194,6 +2194,7 @@ bool StorageReplicatedMergeTree::executeFetchShared( void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block); queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry); if (entry.detach) @@ -2253,9 +2254,14 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) bool replace = !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range); if (replace) + { + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); + } else + { drop_range = {}; + } struct PartDescription { @@ -7094,6 +7100,8 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( String log_znode_path = dynamic_cast(*responses.front()).path_created; entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); + getContext()->getMergeList().cancelInPartition(getStorageID(), partition_id, drop_range_info.max_block); + return true; } From 6bc0a628cd4a803c290ca9116c8f6f9b3d6bea03 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 25 Jun 2021 17:49:28 +0300 Subject: [PATCH 081/183] Remove PrewhereDAGInfo. --- src/Interpreters/ExpressionAnalyzer.cpp | 3 +- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 73 +++++-------------- .../getHeaderForProcessingStage.cpp | 4 +- .../QueryPlan/ReadFromMergeTree.cpp | 5 +- src/Processors/QueryPlan/ReadFromMergeTree.h | 1 + src/Storages/IStorage.cpp | 7 +- .../MergeTreeBaseSelectProcessor.cpp | 36 ++++++--- .../MergeTree/MergeTreeBaseSelectProcessor.h | 6 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 6 +- src/Storages/MergeTree/MergeTreeData.cpp | 29 +++----- .../MergeTree/MergeTreeRangeReader.cpp | 2 +- src/Storages/MergeTree/MergeTreeRangeReader.h | 22 +++++- .../MergeTreeReverseSelectProcessor.cpp | 3 +- .../MergeTreeReverseSelectProcessor.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 3 +- .../MergeTree/MergeTreeSelectProcessor.h | 1 + ...rgeTreeThreadSelectBlockInputProcessor.cpp | 3 +- ...MergeTreeThreadSelectBlockInputProcessor.h | 2 + src/Storages/SelectQueryInfo.h | 26 +------ src/Storages/StorageBuffer.cpp | 7 +- 21 files changed, 108 insertions(+), 134 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fe52b30da7b..96f898e3fe6 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1514,7 +1514,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) { - prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); + prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) { @@ -1734,7 +1734,6 @@ void ExpressionAnalysisResult::checkActions() const check_actions(prewhere_info->prewhere_actions); check_actions(prewhere_info->alias_actions); - check_actions(prewhere_info->remove_columns_actions); } } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 70ff5643b7c..272a5166102 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -239,7 +239,7 @@ struct ExpressionAnalysisResult /// Columns will be removed after prewhere actions execution. NameSet columns_to_remove_after_prewhere; - PrewhereDAGInfoPtr prewhere_info; + PrewhereInfoPtr prewhere_info; FilterDAGInfoPtr filter_info; ConstantFilterDescription prewhere_constant_filter_description; ConstantFilterDescription where_constant_filter_description; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 7cca527cbc1..4d741bfc484 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -958,11 +958,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu if (expressions.prewhere_info) { - if (expressions.prewhere_info->row_level_filter_actions) + if (expressions.prewhere_info->row_level_filter) { auto row_level_filter_step = std::make_unique( query_plan.getCurrentDataStream(), - expressions.prewhere_info->row_level_filter_actions, + expressions.prewhere_info->row_level_filter, expressions.prewhere_info->row_level_column_name, false); @@ -978,18 +978,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu prewhere_step->setStepDescription("PREWHERE"); query_plan.addStep(std::move(prewhere_step)); - - // To remove additional columns in dry run - // For example, sample column which can be removed in this stage - // TODO There seems to be no place initializing remove_columns_actions - if (expressions.prewhere_info->remove_columns_actions) - { - auto remove_columns = std::make_unique( - query_plan.getCurrentDataStream(), expressions.prewhere_info->remove_columns_actions); - - remove_columns->setStepDescription("Remove unnecessary columns after PREWHERE"); - query_plan.addStep(std::move(remove_columns)); - } } } else @@ -1479,33 +1467,29 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan( if (prewhere_info.alias_actions) { - pipe.addSimpleTransform( - [&](const Block & header) { return std::make_shared(header, prewhere_info.alias_actions); }); + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, + std::make_shared(prewhere_info.alias_actions)); + }); } if (prewhere_info.row_level_filter) { pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, prewhere_info.row_level_filter, prewhere_info.row_level_column_name, true); + return std::make_shared(header, + std::make_shared(prewhere_info.row_level_filter), + prewhere_info.row_level_column_name, true); }); } pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, prewhere_info.prewhere_actions, prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); + header, std::make_shared(prewhere_info.prewhere_actions), + prewhere_info.prewhere_column_name, prewhere_info.remove_prewhere_column); }); - - // To remove additional columns - // In some cases, we did not read any marks so that the pipeline.streams is empty - // Thus, some columns in prewhere are not removed as expected - // This leads to mismatched header in distributed table - if (prewhere_info.remove_columns_actions) - { - pipe.addSimpleTransform( - [&](const Block & header) { return std::make_shared(header, prewhere_info.remove_columns_actions); }); - } } auto read_from_pipe = std::make_unique(std::move(pipe)); @@ -1560,7 +1544,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() if (does_storage_support_prewhere && settings.optimize_move_to_prewhere) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. - expressions.prewhere_info = std::make_shared( + expressions.prewhere_info = std::make_shared( std::move(expressions.filter_info->actions), std::move(expressions.filter_info->column_name)); expressions.prewhere_info->prewhere_actions->projectInput(false); @@ -1572,9 +1556,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions() else { /// Add row level security actions to prewhere. - expressions.prewhere_info->row_level_filter_actions = std::move(expressions.filter_info->actions); + expressions.prewhere_info->row_level_filter = std::move(expressions.filter_info->actions); expressions.prewhere_info->row_level_column_name = std::move(expressions.filter_info->column_name); - expressions.prewhere_info->row_level_filter_actions->projectInput(false); + expressions.prewhere_info->row_level_filter->projectInput(false); expressions.filter_info = nullptr; } } @@ -1613,9 +1597,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions() auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames(); required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); - if (prewhere_info->row_level_filter_actions) + if (prewhere_info->row_level_filter) { - auto row_level_required_columns = prewhere_info->row_level_filter_actions->getRequiredColumns().getNames(); + auto row_level_required_columns = prewhere_info->row_level_filter->getRequiredColumns().getNames(); required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end()); } } @@ -1898,28 +1882,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc auto & prewhere_info = analysis_result.prewhere_info; if (prewhere_info) - { - auto actions_settings = ExpressionActionsSettings::fromContext(context, CompileExpressions::yes); - - query_info.prewhere_info = std::make_shared(); - query_info.prewhere_info->prewhere_actions - = std::make_shared(prewhere_info->prewhere_actions, actions_settings); - - if (prewhere_info->row_level_filter_actions) - query_info.prewhere_info->row_level_filter - = std::make_shared(prewhere_info->row_level_filter_actions, actions_settings); - if (prewhere_info->alias_actions) - query_info.prewhere_info->alias_actions - = std::make_shared(prewhere_info->alias_actions, actions_settings); - if (prewhere_info->remove_columns_actions) - query_info.prewhere_info->remove_columns_actions - = std::make_shared(prewhere_info->remove_columns_actions, actions_settings); - - query_info.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name; - query_info.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column; - query_info.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name; - query_info.prewhere_info->need_filter = prewhere_info->need_filter; - } + query_info.prewhere_info = prewhere_info; /// Create optimizer with prepared actions. /// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge. diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 335575a6362..19837cc05d9 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -98,12 +98,12 @@ Block getHeaderForProcessingStage( if (prewhere_info.row_level_filter) { - prewhere_info.row_level_filter->execute(header); + header = prewhere_info.row_level_filter->updateHeader(std::move(header)); header.erase(prewhere_info.row_level_column_name); } if (prewhere_info.prewhere_actions) - prewhere_info.prewhere_actions->execute(header); + header = prewhere_info.prewhere_actions->updateHeader(std::move(header)); if (prewhere_info.remove_prewhere_column) header.erase(prewhere_info.prewhere_column_name); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index fd5de98b4c0..2dc8246cde7 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -94,6 +94,7 @@ ReadFromMergeTree::ReadFromMergeTree( , data(data_) , query_info(query_info_) , prewhere_info(getPrewhereInfo(query_info)) + , actions_settings(ExpressionActionsSettings::fromContext(context_)) , metadata_snapshot(std::move(metadata_snapshot_)) , metadata_snapshot_base(std::move(metadata_snapshot_base_)) , context(std::move(context_)) @@ -157,7 +158,7 @@ Pipe ReadFromMergeTree::readFromPool( i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, data, metadata_snapshot, use_uncompressed_cache, - prewhere_info, reader_settings, virt_column_names); + prewhere_info, actions_settings, reader_settings, virt_column_names); if (i == 0) { @@ -180,7 +181,7 @@ ProcessorPtr ReadFromMergeTree::createSource( return std::make_shared( data, metadata_snapshot, part.data_part, max_block_size, preferred_block_size_bytes, preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, - prewhere_info, true, reader_settings, virt_column_names, part.part_index_in_query); + prewhere_info, actions_settings, true, reader_settings, virt_column_names, part.part_index_in_query); } Pipe ReadFromMergeTree::readInOrder( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 6e1efffdb02..a5184d28593 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -90,6 +90,7 @@ private: const MergeTreeData & data; SelectQueryInfo query_info; PrewhereInfoPtr prewhere_info; + ExpressionActionsSettings actions_settings; StorageMetadataPtr metadata_snapshot; StorageMetadataPtr metadata_snapshot_base; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 83c91dffd7f..c73eb62d039 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -198,7 +198,7 @@ NameDependencies IStorage::getDependentViewsByColumn(ContextPtr context) const return name_deps; } -std::string PrewhereDAGInfo::dump() const +std::string PrewhereInfo::dump() const { WriteBufferFromOwnString ss; ss << "PrewhereDagInfo\n"; @@ -213,11 +213,6 @@ std::string PrewhereDAGInfo::dump() const ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n"; } - if (remove_columns_actions) - { - ss << "remove_columns_actions " << remove_columns_actions->dumpDAG() << "\n"; - } - ss << "remove_prewhere_column " << remove_prewhere_column << ", need_filter " << need_filter << "\n"; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index d9cb949042c..68f754b08fb 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -26,6 +26,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, UInt64 max_block_size_rows_, UInt64 preferred_block_size_bytes_, UInt64 preferred_max_column_in_block_size_bytes_, @@ -49,6 +50,23 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( for (auto it = virt_column_names.rbegin(); it != virt_column_names.rend(); ++it) if (header_without_virtual_columns.has(*it)) header_without_virtual_columns.erase(*it); + + if (prewhere_info) + { + prewhere_actions = std::make_unique(); + if (prewhere_info->alias_actions) + prewhere_actions->alias_actions = std::make_shared(prewhere_info->alias_actions, actions_settings); + + if (prewhere_info->row_level_filter) + prewhere_actions->row_level_filter = std::make_shared(prewhere_info->row_level_filter, actions_settings); + + prewhere_actions->prewhere_actions = std::make_shared(prewhere_info->prewhere_actions, actions_settings); + + prewhere_actions->row_level_column_name = prewhere_info->row_level_column_name; + prewhere_actions->prewhere_column_name = prewhere_info->prewhere_column_name; + prewhere_actions->remove_prewhere_column = prewhere_info->remove_prewhere_column; + prewhere_actions->need_filter = prewhere_info->need_filter; + } } @@ -78,14 +96,14 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu { if (reader->getColumns().empty()) { - current_task.range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_info, true); + current_task.range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_actions.get(), true); } else { MergeTreeRangeReader * pre_reader_ptr = nullptr; if (pre_reader != nullptr) { - current_task.pre_range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_info, false); + current_task.pre_range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_actions.get(), false); pre_reader_ptr = ¤t_task.pre_range_reader; } @@ -396,16 +414,17 @@ void MergeTreeBaseSelectProcessor::injectVirtualColumns( chunk.setColumns(columns, num_rows); } -void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info) +Block MergeTreeBaseSelectProcessor::transformHeader( + Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) { if (prewhere_info) { if (prewhere_info->alias_actions) - prewhere_info->alias_actions->execute(block); + block = prewhere_info->alias_actions->updateHeader(std::move(block)); if (prewhere_info->row_level_filter) { - prewhere_info->row_level_filter->execute(block); + block = prewhere_info->row_level_filter->updateHeader(std::move(block)); auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); if (!row_level_column.type->canBeUsedInBooleanContext()) { @@ -417,7 +436,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P } if (prewhere_info->prewhere_actions) - prewhere_info->prewhere_actions->execute(block); + block = prewhere_info->prewhere_actions->updateHeader(std::move(block)); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) @@ -434,12 +453,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P ctn.column = ctn.type->createColumnConst(block.rows(), 1u)->convertToFullColumnIfConst(); } } -} -Block MergeTreeBaseSelectProcessor::transformHeader( - Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) -{ - executePrewhereActions(block, prewhere_info); injectVirtualColumns(block, nullptr, partition_value_type, virtual_columns); return block; } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 4615dec089f..2ae39dbb058 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -13,7 +13,7 @@ namespace DB class IMergeTreeReader; class UncompressedCache; class MarkCache; - +struct PrewhereActions; /// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor class MergeTreeBaseSelectProcessor : public SourceWithProgress @@ -24,6 +24,7 @@ public: const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, UInt64 max_block_size_rows_, UInt64 preferred_block_size_bytes_, UInt64 preferred_max_column_in_block_size_bytes_, @@ -36,8 +37,6 @@ public: static Block transformHeader( Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns); - static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info); - protected: Chunk generate() final; @@ -61,6 +60,7 @@ protected: StorageMetadataPtr metadata_snapshot; PrewhereInfoPtr prewhere_info; + std::unique_ptr prewhere_actions; UInt64 max_block_size_rows; UInt64 preferred_block_size_bytes; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 15b4fbd31c0..b8698ae3e01 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -272,16 +272,16 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (prewhere_info) { if (prewhere_info->alias_actions) - pre_column_names = prewhere_info->alias_actions->getRequiredColumns(); + pre_column_names = prewhere_info->alias_actions->getRequiredColumnsNames(); else { - pre_column_names = prewhere_info->prewhere_actions->getRequiredColumns(); + pre_column_names = prewhere_info->prewhere_actions->getRequiredColumnsNames(); if (prewhere_info->row_level_filter) { NameSet names(pre_column_names.begin(), pre_column_names.end()); - for (auto & name : prewhere_info->row_level_filter->getRequiredColumns()) + for (auto & name : prewhere_info->row_level_filter->getRequiredColumnsNames()) { if (names.count(name) == 0) pre_column_names.push_back(name); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index abc37f52ff9..f6d542d5f2c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3940,15 +3940,9 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (analysis_result.prewhere_info) { - const auto & prewhere_info = analysis_result.prewhere_info; - candidate.prewhere_info = std::make_shared(); - candidate.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name; - candidate.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column; - // std::cerr << fmt::format("remove prewhere column : {}", candidate.prewhere_info->remove_prewhere_column) << std::endl; - candidate.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name; - candidate.prewhere_info->need_filter = prewhere_info->need_filter; + candidate.prewhere_info = analysis_result.prewhere_info; - auto prewhere_actions = prewhere_info->prewhere_actions->clone(); + auto prewhere_actions = candidate.prewhere_info->prewhere_actions->clone(); auto prewhere_required_columns = required_columns; // required_columns should not contain columns generated by prewhere for (const auto & column : prewhere_actions->getResultColumns()) @@ -3956,28 +3950,27 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( // std::cerr << fmt::format("prewhere_actions = \n{}", prewhere_actions->dumpDAG()) << std::endl; // Prewhere_action should not add missing keys. prewhere_required_columns = prewhere_actions->foldActionsByProjection( - prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->prewhere_column_name, false); + prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); // std::cerr << fmt::format("prewhere_actions = \n{}", prewhere_actions->dumpDAG()) << std::endl; // std::cerr << fmt::format("prewhere_required_columns = \n{}", fmt::join(prewhere_required_columns, ", ")) << std::endl; if (prewhere_required_columns.empty()) return false; - candidate.prewhere_info->prewhere_actions = std::make_shared(prewhere_actions, actions_settings); + candidate.prewhere_info->prewhere_actions = prewhere_actions; - if (prewhere_info->row_level_filter_actions) + if (candidate.prewhere_info->row_level_filter) { - auto row_level_filter_actions = prewhere_info->row_level_filter_actions->clone(); + auto row_level_filter_actions = candidate.prewhere_info->row_level_filter->clone(); prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( - prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->row_level_column_name, false); + prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->row_level_column_name, false); // std::cerr << fmt::format("row_level_filter_required_columns = \n{}", fmt::join(prewhere_required_columns, ", ")) << std::endl; if (prewhere_required_columns.empty()) return false; - candidate.prewhere_info->row_level_filter - = std::make_shared(row_level_filter_actions, actions_settings); + candidate.prewhere_info->row_level_filter = row_level_filter_actions; } - if (prewhere_info->alias_actions) + if (candidate.prewhere_info->alias_actions) { - auto alias_actions = prewhere_info->alias_actions->clone(); + auto alias_actions = candidate.prewhere_info->alias_actions->clone(); // std::cerr << fmt::format("alias_actions = \n{}", alias_actions->dumpDAG()) << std::endl; prewhere_required_columns = alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys, {}, false); @@ -3985,7 +3978,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( // std::cerr << fmt::format("alias_required_columns = \n{}", fmt::join(prewhere_required_columns, ", ")) << std::endl; if (prewhere_required_columns.empty()) return false; - candidate.prewhere_info->alias_actions = std::make_shared(alias_actions, actions_settings); + candidate.prewhere_info->alias_actions = alias_actions; } required_columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 27682b81c94..8072aa6a3dc 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -520,7 +520,7 @@ size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn: MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereInfoPtr & prewhere_info_, + const PrewhereActions * prewhere_info_, bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part->index_granularity)) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 18075e52bdd..7c36ca49c99 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -15,6 +15,24 @@ class MergeTreeIndexGranularity; struct PrewhereInfo; using PrewhereInfoPtr = std::shared_ptr; +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + +struct PrewhereActions +{ + /// Actions which are executed in order to alias columns are used for prewhere actions. + ExpressionActionsPtr alias_actions; + /// Actions for row level security filter. Applied separately before prewhere_actions. + /// This actions are separate because prewhere condition should not be executed over filtered rows. + ExpressionActionsPtr row_level_filter; + /// Actions which are executed on block in order to get filter column for prewhere step. + ExpressionActionsPtr prewhere_actions; + String row_level_column_name; + String prewhere_column_name; + bool remove_prewhere_column = false; + bool need_filter = false; +}; + /// MergeTreeReader iterator which allows sequential reading for arbitrary number of rows between pairs of marks in the same part. /// Stores reading state, which can be inside granule. Can skip rows in current granule and start reading from next mark. /// Used generally for reading number of rows less than index granularity to decrease cache misses for fat blocks. @@ -24,7 +42,7 @@ public: MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereInfoPtr & prewhere_info_, + const PrewhereActions * prewhere_info_, bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -217,7 +235,7 @@ private: IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly. - PrewhereInfoPtr prewhere_info; + const PrewhereActions * prewhere_info; Stream stream; diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index e9527efaa4a..81833b76735 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -23,6 +23,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( MarkRanges mark_ranges_, bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, bool check_columns, const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_, @@ -31,7 +32,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( : MergeTreeBaseSelectProcessor{ metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), - storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, + storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, required_columns{std::move(required_columns_)}, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index c9fd06c5534..b807c2d912c 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -27,6 +27,7 @@ public: MarkRanges mark_ranges, bool use_uncompressed_cache, const PrewhereInfoPtr & prewhere_info, + ExpressionActionsSettings actions_settings, bool check_columns, const MergeTreeReaderSettings & reader_settings, const Names & virt_column_names = {}, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 980afa170e9..ce342a69fe0 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -23,6 +23,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( MarkRanges mark_ranges_, bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, bool check_columns_, const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_, @@ -31,7 +32,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( : MergeTreeBaseSelectProcessor{ metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), - storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, + storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, required_columns{std::move(required_columns_)}, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 925c437f1ce..b63107b6dbf 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -27,6 +27,7 @@ public: MarkRanges mark_ranges, bool use_uncompressed_cache, const PrewhereInfoPtr & prewhere_info, + ExpressionActionsSettings actions_settings, bool check_columns, const MergeTreeReaderSettings & reader_settings, const Names & virt_column_names = {}, diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index 2f0aad77d96..daefb17038a 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -19,11 +19,12 @@ MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcess const StorageMetadataPtr & metadata_snapshot_, const bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_) : MergeTreeBaseSelectProcessor{ - pool_->getHeader(), storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, + pool_->getHeader(), storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, thread{thread_}, diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h index 2b2ed36fc18..30c551eede0 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h @@ -25,7 +25,9 @@ public: const StorageMetadataPtr & metadata_snapshot_, const bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, + ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, + const Names & virt_column_names_); String getName() const override { return "MergeTreeThread"; } diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 73cf3893a89..a7d2ae3e7dd 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -21,9 +21,6 @@ using ActionsDAGPtr = std::shared_ptr; struct PrewhereInfo; using PrewhereInfoPtr = std::shared_ptr; -struct PrewhereDAGInfo; -using PrewhereDAGInfoPtr = std::shared_ptr; - struct FilterInfo; using FilterInfoPtr = std::shared_ptr; @@ -45,34 +42,19 @@ using ClusterPtr = std::shared_ptr; struct PrewhereInfo { /// Actions which are executed in order to alias columns are used for prewhere actions. - ExpressionActionsPtr alias_actions; + ActionsDAGPtr alias_actions; /// Actions for row level security filter. Applied separately before prewhere_actions. /// This actions are separate because prewhere condition should not be executed over filtered rows. - ExpressionActionsPtr row_level_filter; + ActionsDAGPtr row_level_filter; /// Actions which are executed on block in order to get filter column for prewhere step. - ExpressionActionsPtr prewhere_actions; - /// Actions which are executed after reading from storage in order to remove unused columns. - ExpressionActionsPtr remove_columns_actions; - String row_level_column_name; - String prewhere_column_name; - bool remove_prewhere_column = false; - bool need_filter = false; -}; - -/// Same as PrewhereInfo, but with ActionsDAG. -struct PrewhereDAGInfo -{ - ActionsDAGPtr alias_actions; - ActionsDAGPtr row_level_filter_actions; ActionsDAGPtr prewhere_actions; - ActionsDAGPtr remove_columns_actions; String row_level_column_name; String prewhere_column_name; bool remove_prewhere_column = false; bool need_filter = false; - PrewhereDAGInfo() = default; - explicit PrewhereDAGInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) + PrewhereInfo() = default; + explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} std::string dump() const; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 55dc2d12c9d..a433cd248c7 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -369,13 +369,14 @@ void StorageBuffer::read( { if (query_info.prewhere_info) { + auto actions_settings = ExpressionActionsSettings::fromContext(local_context); if (query_info.prewhere_info->alias_actions) { pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - query_info.prewhere_info->alias_actions); + std::make_shared(query_info.prewhere_info->alias_actions, actions_settings)); }); } @@ -385,7 +386,7 @@ void StorageBuffer::read( { return std::make_shared( header, - query_info.prewhere_info->row_level_filter, + std::make_shared(query_info.prewhere_info->row_level_filter, actions_settings), query_info.prewhere_info->row_level_column_name, false); }); @@ -395,7 +396,7 @@ void StorageBuffer::read( { return std::make_shared( header, - query_info.prewhere_info->prewhere_actions, + std::make_shared(query_info.prewhere_info->prewhere_actions, actions_settings), query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); From 64652c3597c0afd55c9b7ec6c8778dd9923a4d8e Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:55:25 +0300 Subject: [PATCH 082/183] Update docs/en/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 9e2291a346d..794d009f644 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. From 15bc66111959619a7052b96c0c08fe7141a3a2e8 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:55:48 +0300 Subject: [PATCH 083/183] Update docs/ru/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 0f3c8f68c9c..95f45066e71 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Аргументы** - `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. -- `source` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные символы в режиме "только чтение": `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные символы в режиме "только чтение": `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. From 0b886acd8d06786d4bc650597469318bb85626fd Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:56:02 +0300 Subject: [PATCH 084/183] Update docs/ru/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 95f45066e71..1d9477f5787 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -3,7 +3,7 @@ toc_priority: 55 toc_title: s3Cluster --- -# Табличная Функция s3Cluster {#s3Cluster-table-function} +# Табличная функция s3Cluster {#s3Cluster-table-function} Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, раскрывает звездочки в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу для обработки и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены. From 638b2862b8873c0a742fdc596f2f9c77e7ae6d98 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:56:11 +0300 Subject: [PATCH 085/183] Update docs/ru/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 1d9477f5787..8b4dae91fc7 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -5,7 +5,7 @@ toc_title: s3Cluster # Табличная функция s3Cluster {#s3Cluster-table-function} -Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, раскрывает звездочки в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу для обработки и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены. +Позволяет обрабатывать файлы из [Amazon S3](https://aws.amazon.com/s3/) параллельно из многих узлов в указанном кластере. На узле-инициаторе функция создает соединение со всеми узлами в кластере, заменяет символы '*' в пути к файлу S3 и динамически отправляет каждый файл. На рабочем узле функция запрашивает у инициатора следующую задачу и обрабатывает ее. Это повторяется до тех пор, пока все задачи не будут завершены. **Синтаксис** From 400e6855322edcc20f967b8e3f54379569e0b4fd Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:56:18 +0300 Subject: [PATCH 086/183] Update docs/ru/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 8b4dae91fc7..9ad20c4b5e1 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -33,7 +33,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); ``` -Подсчитаем общее количество строк во всех файлах кластера `cluster_simple`: +Подсчет общего количества строк во всех файлах кластера `cluster_simple`: ``` sql SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); From 25c8e09dbb113e03158c7f0a8751ac47a509fcad Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Fri, 25 Jun 2021 19:56:26 +0300 Subject: [PATCH 087/183] Update docs/ru/sql-reference/table-functions/s3Cluster.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 9ad20c4b5e1..32916a2b122 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -27,7 +27,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Примеры** -Выведем данные из всех файлов кластера `cluster_simple`: +Вывод данных из всех файлов кластера `cluster_simple`: ``` sql SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon); From 5e5aa409eed0f2301582ffbac9c352bd08739f14 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Sun, 27 Jun 2021 01:00:39 +0300 Subject: [PATCH 088/183] Apply suggestions from code review Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/operations/settings/settings.md | 24 ++++++++--------- .../aggregate-functions/reference/count.md | 2 +- docs/en/sql-reference/data-types/map.md | 26 +++---------------- .../functions/array-functions.md | 6 ++--- .../functions/tuple-map-functions.md | 4 +-- docs/en/sql-reference/operators/index.md | 4 +-- 6 files changed, 24 insertions(+), 42 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 6190b9b030b..5cb10720cf9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1729,23 +1729,23 @@ Default value: 0. ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Optimizes functions (if possible) by transforming them to read the subcolumns. This reduces the amount of read data. +Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. -These function can be tranformed: +These functions can be transformed: -- [length](../../sql-reference/functions/array-functions.md#array_functions-length) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). -- [empty](../../sql-reference/functions/array-functions.md#function-empty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). -- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read subcolumn [size0](../../sql-reference/data-types/array.md#array-size). -- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). -- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). -- [count](../../sql-reference/aggregate-functions/reference/count.md) to read subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). -- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). -- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). +- [length](../../sql-reference/functions/array-functions.md#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md#function-empty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read the [keys](../../sql-reference/data-types/map.md#subcolumn-keys) subcolumn. +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read the [values](../../sql-reference/data-types/map.md#subcolumn-values) subcolumn. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Optimization disabled. +- 1 — Optimization enabled. Default value: `0`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 6f55d3b5cee..9356d0aab46 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -31,7 +31,7 @@ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this const The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../../sql-reference/data-types/nullable.md#finding-null). +The `SELECT count() FROM table` query can be optimized by enabling the [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole table data. **Examples** diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index dc1a9846d22..dff74b0cef4 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -76,9 +76,9 @@ SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map └───────────────────────────────┘ ``` -## Subcolumn Map.keys {#subcolumn-keys} +## Map.keys and Map.values Subcolumns {#map-subcolumns} -To read all keys of a `Map` you can use the subcolumn `keys`, which doesn't read the whole column. +To optimize `Map` column processing, in some cases you can use the `keys` and 'values' subcolumns instead of reading the whole column. **Example** @@ -90,6 +90,8 @@ CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); SELECT a.keys FROM t_map; + +SELECT a.values FROM t_map; ``` Result: @@ -98,27 +100,7 @@ Result: ┌─a.keys─────────────────┐ │ ['key1','key2','key3'] │ └────────────────────────┘ -``` -## Subcolumn Map.values {#subcolumn-keys} - -To read all values of a `Map` you can use the subcolumn `values`, which doesn't read the whole column. - -**Example** - -Query: - -``` sql -CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; - -INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)) - -SELECT a.values FROM t_map; -``` - -Result: - -``` text ┌─a.values─┐ │ [1,2,3] │ └──────────┘ diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 10b8500b571..6f7a2d63318 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -11,7 +11,7 @@ Returns 1 for an empty array, or 0 for a non-empty array. The result type is UInt8. The function also works for strings. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. ## notEmpty {#function-notempty} @@ -19,7 +19,7 @@ Returns 0 for an empty array, or 1 for a non-empty array. The result type is UInt8. The function also works for strings. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. ## length {#array_functions-length} @@ -27,7 +27,7 @@ Returns the number of items in the array. The result type is UInt64. The function also works for strings. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [size0](../../sql-reference/data-types/array.md#array-size). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 2deb9323cff..b506ce7f190 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -220,7 +220,7 @@ Result: Returns all keys from the `map` parameter. -Can be optimized by setting the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [keys](../../sql-reference/data-types/map.md#subcolumn-keys). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. **Syntax** @@ -263,7 +263,7 @@ Result: Returns all values from the `map` parameter. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [values](../../sql-reference/data-types/map.md#subcolumn-values). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. **Syntax** diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 1cb7936969c..9fa84c9eaae 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -283,7 +283,7 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. - `0` otherwise. - For other values, the `IS NULL` operator always returns `0`. -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. @@ -316,4 +316,4 @@ SELECT * FROM t_null WHERE y IS NOT NULL └───┴───┘ ``` -Can be optimized by the setting [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads subcolumn [null](../../sql-reference/data-types/nullable.md#finding-null). +Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. From e7af8ee31b482eb29bb6ccf28c8a460a62c99776 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Sun, 27 Jun 2021 02:09:30 +0300 Subject: [PATCH 089/183] Apply suggestions from code review Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/aggregate-functions/reference/count.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 9356d0aab46..78ab20151b7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -29,8 +29,6 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/ ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function. -The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. - The `SELECT count() FROM table` query can be optimized by enabling the [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole table data. **Examples** From 96b2c054da05e9f496488af1431b897e677165cd Mon Sep 17 00:00:00 2001 From: George Date: Sun, 27 Jun 2021 02:37:34 +0300 Subject: [PATCH 090/183] Added translation --- docs/en/operations/settings/settings.md | 4 +-- docs/en/sql-reference/data-types/map.md | 2 +- docs/ru/operations/settings/settings.md | 22 +++++++++++++ .../aggregate-functions/reference/count.md | 2 +- docs/ru/sql-reference/data-types/map.md | 31 +++++++++++++++++++ .../functions/array-functions.md | 6 ++++ .../functions/tuple-map-functions.md | 4 +++ docs/ru/sql-reference/operators/index.md | 4 +++ 8 files changed, 71 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5cb10720cf9..52556c581a0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1739,8 +1739,8 @@ These functions can be transformed: - [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. - [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. - [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn. -- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read the [keys](../../sql-reference/data-types/map.md#subcolumn-keys) subcolumn. -- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read the [values](../../sql-reference/data-types/map.md#subcolumn-values) subcolumn. +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read the [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn. +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read the [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn. Possible values: diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index dff74b0cef4..86ea55004fd 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -78,7 +78,7 @@ SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map ## Map.keys and Map.values Subcolumns {#map-subcolumns} -To optimize `Map` column processing, in some cases you can use the `keys` and 'values' subcolumns instead of reading the whole column. +To optimize `Map` column processing, in some cases you can use the `keys` and `values` subcolumns instead of reading the whole column. **Example** diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index fd5c9dba43a..d1904dc8617 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1606,6 +1606,28 @@ ClickHouse генерирует исключение Значение по умолчанию: 0. +## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} + +Включает или отключает оптимизацию преобразованием некоторых функций к чтению подстобцов, таким образом уменьшая количество данных для чтения. + +Могут быть преобразованы следующие функции: + +- [length](../../sql-reference/functions/array-functions.md#array_functions-length) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md#function-empty) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) к чтению подстолбца [size0](../../sql-reference/data-types/array.md#array-size). +- [isNull](../../sql-reference/operators/index.md#operator-is-null) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [count](../../sql-reference/aggregate-functions/reference/count.md) к чтению подстолбца [null](../../sql-reference/data-types/nullable.md#finding-null). +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) к чтению подстолбца [keys](../../sql-reference/data-types/map.md#map-subcolumns). +- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) к чтению подстолбца [values](../../sql-reference/data-types/map.md#map-subcolumns). + +Возможные значения: + +- 0 — оптимизация отключена. +- 1 — оптимизация включена. + +Значение по умолчанию: `0`. + ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} - Тип: секунды diff --git a/docs/ru/sql-reference/aggregate-functions/reference/count.md b/docs/ru/sql-reference/aggregate-functions/reference/count.md index 06cf66bd8bd..da882621085 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/count.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/count.md @@ -29,7 +29,7 @@ ClickHouse поддерживает следующие виды синтакси ClickHouse поддерживает синтаксис `COUNT(DISTINCT ...)`. Поведение этой конструкции зависит от настройки [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation). Она определяет, какая из функций [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) используется для выполнения операции. По умолчанию — функция [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact). -Запрос `SELECT count() FROM table` не оптимизирован, поскольку количество записей в таблице не хранится отдельно. Он выбирает небольшой столбец из таблицы и подсчитывает количество значений в нём. +Запрос `SELECT count() FROM table` может быть оптимизирован включением настройки optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [null](../../../sql-reference/data-types/nullable.md#finding-null) вместо чтения всех данных таблицы. **Примеры** diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md index 6cb8ccf1143..a703eb1b0ac 100644 --- a/docs/ru/sql-reference/data-types/map.md +++ b/docs/ru/sql-reference/data-types/map.md @@ -8,6 +8,7 @@ toc_title: Map(key, value) Тип данных `Map(key, value)` хранит пары `ключ:значение`. **Параметры** + - `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). - `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). @@ -61,6 +62,36 @@ SELECT a['key3'] FROM table_map; └─────────────────────────┘ ``` +## Подстолбцы Map.keys и Map.values {#map-subcolumns} + +Для оптимизации обработки столбцов `Map` в некоторых случаях можно использовать подстолбцы `keys` и `values` вместо чтения всего столбца. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE t_map (`a` Map(String, UInt64)) ENGINE = Memory; + +INSERT INTO t_map VALUES (map('key1', 1, 'key2', 2, 'key3', 3)); + +SELECT a.keys FROM t_map; + +SELECT a.values FROM t_map; +``` + +Результат: + +``` text +┌─a.keys─────────────────┐ +│ ['key1','key2','key3'] │ +└────────────────────────┘ + +┌─a.values─┐ +│ [1,2,3] │ +└──────────┘ +``` + **См. также** - функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 0dfad45605a..cdf1a1a9bbf 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -11,18 +11,24 @@ toc_title: "Массивы" Тип результата - UInt8. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. + ## notEmpty {#function-notempty} Возвращает 0 для пустого массива, и 1 для непустого массива. Тип результата - UInt8. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. + ## length {#array_functions-length} Возвращает количество элементов в массиве. Тип результата - UInt64. Функция также работает для строк. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index c385dbd8f87..7c3cd706d3c 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -224,6 +224,8 @@ SELECT mapContains(a, 'name') FROM test; Возвращает все ключи контейнера `map`. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [keys](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. + **Синтаксис** ```sql @@ -265,6 +267,8 @@ SELECT mapKeys(a) FROM test; Возвращает все значения контейнера `map`. +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [values](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. + **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index 5cf21b64079..80507f7529b 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -283,6 +283,8 @@ ClickHouse поддерживает операторы `IS NULL` и `IS NOT NULL - `0` в обратном случае. - Для прочих значений оператор `IS NULL` всегда возвращает `0`. +Оператор можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` читается только подстолбец [keys](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. + ``` sql @@ -302,6 +304,8 @@ SELECT x+100 FROM t_null WHERE y IS NULL - `1`, в обратном случае. - Для прочих значений оператор `IS NOT NULL` всегда возвращает `1`. +Оператор можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` читается только подстолбец [keys](../../sql-reference/data-types/map.md#map-subcolumns) вместо чтения и обработки данных всего столбца. + ``` sql From 0e001329363284c1eb004fe5de2ee9dd654611df Mon Sep 17 00:00:00 2001 From: George Date: Sun, 27 Jun 2021 03:44:26 +0300 Subject: [PATCH 091/183] Small fix --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index d1904dc8617..346971e2a11 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1608,7 +1608,7 @@ ClickHouse генерирует исключение ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Включает или отключает оптимизацию преобразованием некоторых функций к чтению подстобцов, таким образом уменьшая количество данных для чтения. +Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстобцов, таким образом уменьшая количество данных для чтения. Могут быть преобразованы следующие функции: From ed262d423f5f8f387d565a1288a9cf7d1dd793ff Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Sun, 27 Jun 2021 13:22:49 +0300 Subject: [PATCH 092/183] Update docs/ru/sql-reference/aggregate-functions/reference/count.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/aggregate-functions/reference/count.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/count.md b/docs/ru/sql-reference/aggregate-functions/reference/count.md index da882621085..9b753c03d24 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/count.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/count.md @@ -29,7 +29,7 @@ ClickHouse поддерживает следующие виды синтакси ClickHouse поддерживает синтаксис `COUNT(DISTINCT ...)`. Поведение этой конструкции зависит от настройки [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation). Она определяет, какая из функций [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) используется для выполнения операции. По умолчанию — функция [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact). -Запрос `SELECT count() FROM table` может быть оптимизирован включением настройки optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [null](../../../sql-reference/data-types/nullable.md#finding-null) вместо чтения всех данных таблицы. +Запрос `SELECT count() FROM table` может быть оптимизирован включением настройки [optimize_functions_to_subcolumns](../../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [null](../../../sql-reference/data-types/nullable.md#finding-null) вместо чтения всех данных таблицы. **Примеры** @@ -68,4 +68,3 @@ SELECT count(DISTINCT num) FROM t ``` Этот пример показывает, что `count(DISTINCT num)` выполняется с помощью функции `uniqExact` в соответствии со значением настройки `count_distinct_implementation`. - From 9674669726d9dfa9159368e74fd5de4c8f131d24 Mon Sep 17 00:00:00 2001 From: George Date: Sun, 27 Jun 2021 13:25:30 +0300 Subject: [PATCH 093/183] typo --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 346971e2a11..eb78f37d87f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1608,7 +1608,7 @@ ClickHouse генерирует исключение ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстобцов, таким образом уменьшая количество данных для чтения. +Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстолбцов, таким образом уменьшая объем данных для чтения. Могут быть преобразованы следующие функции: From f6fa720b4fea2fc1b3d4263f691f7a094c18793b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 27 Jun 2021 19:06:24 +0300 Subject: [PATCH 094/183] Better test --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 8d66cfddb3e..e8b7cda0dff 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -11,7 +11,8 @@ seq 1 1000 | pv --quiet --rate-limit 3893 | ${CLICKHOUSE_CLIENT} --query "INSERT # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; - SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'NetworkReceiveElapsedMicroseconds')] >= 1000000 FROM system.query_log + WITH ProfileEvents.Values[indexOf(ProfileEvents.Names, 'NetworkReceiveElapsedMicroseconds')] AS time + SELECT time >= 1000000 ? 1 : time FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" ${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From e6adc405d823a1a9be8edd0bb6107836d1c190cc Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 14 May 2021 23:07:08 +0300 Subject: [PATCH 095/183] DateTime timezone fix --- src/DataTypes/getLeastSupertype.cpp | 15 ++++++++++++++- src/Functions/DateTimeTransforms.h | 18 +++++++++++------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a04d16ef7cd..4614d65ed8a 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -288,9 +288,18 @@ DataTypePtr getLeastSupertype(const DataTypes & types) ErrorCodes::NO_COMMON_TYPE); if (have_datetime64 == 0) + { + for (const auto & t : types) + { + if (const auto * data_type = typeid_cast(t.get())) + return std::make_shared(data_type->getTimeZone().getTimeZone()); + } + return std::make_shared(); + } UInt8 max_scale = 0; + const DataTypeDateTime64 * max_scale_date_time = nullptr; for (const auto & t : types) { @@ -298,11 +307,15 @@ DataTypePtr getLeastSupertype(const DataTypes & types) { const auto scale = dt64->getScale(); if (scale > max_scale) + { + max_scale_date_time = dt64; max_scale = scale; + } } } - return std::make_shared(max_scale); + assert(max_scale_date_time); + return std::make_shared(max_scale, max_scale_date_time->getTimeZone().getTimeZone()); } } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 03f35333150..d4e1ad25084 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -863,19 +864,22 @@ struct DateTimeTransformImpl { using Op = Transformer; - size_t time_zone_argument_position = 1; - if constexpr (std::is_same_v) - time_zone_argument_position = 2; - - const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0); - const ColumnPtr source_col = arguments[0].column; if (const auto * sources = checkAndGetColumn(source_col.get())) { auto mutable_result_col = result_type->createColumn(); auto * col_to = assert_cast(mutable_result_col.get()); - Op::vector(sources->getData(), col_to->getData(), time_zone, transform); + WhichDataType result_data_type(result_type); + if (result_data_type.isDateOrDateTime()) + { + const auto & time_zone = dynamic_cast(*result_type).getTimeZone(); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform); + } + else + { + Op::vector(sources->getData(), col_to->getData(), DateLUT::instance(), transform); + } return mutable_result_col; } From 61b116332d44839523f41bd8dedd40151b5a80c0 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 15 May 2021 00:46:17 +0300 Subject: [PATCH 096/183] Fixed tests --- src/Functions/DateTimeTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index d4e1ad25084..70035cdda30 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -871,7 +871,7 @@ struct DateTimeTransformImpl auto * col_to = assert_cast(mutable_result_col.get()); WhichDataType result_data_type(result_type); - if (result_data_type.isDateOrDateTime()) + if (result_data_type.isDateTime() || result_data_type.isDateTime64()) { const auto & time_zone = dynamic_cast(*result_type).getTimeZone(); Op::vector(sources->getData(), col_to->getData(), time_zone, transform); From 55dd0fbfc2aa0663d005f1675178c63bdff27a98 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Mon, 28 Jun 2021 13:16:41 +0300 Subject: [PATCH 097/183] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md Co-authored-by: tavplubix --- .../external-dictionaries/external-dicts-dict-lifetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index 9d4205ab1d1..ea1b62c6cef 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -51,7 +51,7 @@ LIFETIME(300) LIFETIME(MIN 300 MAX 360) ``` -Если `0` и `0`, ClickHouse не перегружает словарь по истечении времени. +Если `0` и `0`, ClickHouse не перезагружает словарь по истечении времени. В этом случае ClickHouse может перезагрузить данные словаря, если изменился XML файл с конфигурацией словаря или если была выполнена команда `SYSTEM RELOAD DICTIONARY`. При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md): From d20feeb42c0feea879b4167d04daa92bc3fc6f7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jun 2021 15:05:22 +0300 Subject: [PATCH 098/183] Relax check during DROP PART --- src/Storages/MergeTree/MergeTreeData.cpp | 29 +++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c115f7f593d..2f4c2efc7ba 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2332,10 +2332,33 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); /// It's a DROP PART and it's already executed by fetching some covering part - if (part->info != drop_range && part->info.contains(drop_range)) + bool is_drop_part = !drop_range.isFakeDropRangePart(); + + if (is_drop_part && part->info != drop_range) { - LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name); - return {}; + /// Why we check only min and max blocks here without checking merge + /// level? It's a tricky situation which can happen on a stale + /// replica. For example, we have parts all_1_1_0, all_2_2_0 and + /// all_3_3_0. Fast replica assign some merges (OPTIMIZE FINAL or + /// TTL) all_2_2_0 -> all_2_2_1 -> all_2_2_2. So it has set of parts + /// all_1_1_0, all_2_2_2 and all_3_3_0. After that it decides to + /// drop part all_2_2_2. Now set of parts is all_1_1_0 and + /// all_3_3_0. Now fast replica assign merge all_1_1_0 + all_3_3_0 + /// to all_1_3_1 and finishes it. Slow replica pulls the queue and + /// have two contradictory tasks -- drop all_2_2_2 and merge/fetch + /// all_1_3_1. If this replica will fetch all_1_3_1 first and then tries + /// to drop all_2_2_2 after that it will receive the LOGICAL ERROR. + /// So here we just check that all_1_3_1 covers blocks from drop + /// all_2_2_2. + /// + /// NOTE: this helps only to avoid logical error during drop part. + /// We still get intersecting "parts" in queue. + bool is_covered_by_min_max_block = part->info.min_block <= drop_range.min_block && part->info.max_block >= drop_range.max_block; + if (is_covered_by_min_max_block) + { + LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name); + return {}; + } } if (part->info.min_block < drop_range.min_block) From b64eb0ff070c9de165868229d72b80052ea070f1 Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 28 Jun 2021 19:21:53 +0500 Subject: [PATCH 099/183] add DELETE IN PARTITION and UPDATE IN PARTITION into ALTER syntax TOC Signed-off-by: Slach --- docs/en/sql-reference/statements/alter/partition.md | 2 ++ docs/ru/sql-reference/statements/alter/partition.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 86381d3c6a4..090cbe93c54 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -19,6 +19,8 @@ The following operations with [partitions](../../../engines/table-engines/merget - [UNFREEZE PARTITION](#alter_unfreeze-partition) — Removes a backup of a partition. - [FETCH PARTITION\|PART](#alter_fetch-partition) — Downloads a part or partition from another server. - [MOVE PARTITION\|PART](#alter_move-partition) — Move partition/data part to another disk or volume. +- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. +- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 79242e7bbf3..0a485c7b591 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -19,6 +19,8 @@ toc_title: PARTITION - [UNFREEZE PARTITION](#alter_unfreeze-partition) — удалить резервную копию партиции; - [FETCH PARTITION](#alter_fetch-partition) — скачать партицию с другого сервера; - [MOVE PARTITION\|PART](#alter_move-partition) — переместить партицию/кускок на другой диск или том. +- [UPDATE IN PARTITION](#update-in-partition) — обновить данные внутри партиции по условию. +- [DELETE IN PARTITION](#delete-in-partition) — удалить данные внутри партиции по условию. ## DETACH PARTITION\|PART {#alter_detach-partition} From 5803fb9f04e9e3a53009a6f849d99a737ced4add Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 28 Jun 2021 17:23:47 +0300 Subject: [PATCH 100/183] Update docs/zh/engines/table-engines/special/file.md Co-authored-by: Amos Bird --- docs/zh/engines/table-engines/special/file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/special/file.md b/docs/zh/engines/table-engines/special/file.md index 503d6d7e7f5..4464dcf198c 100644 --- a/docs/zh/engines/table-engines/special/file.md +++ b/docs/zh/engines/table-engines/special/file.md @@ -54,7 +54,7 @@ SELECT * FROM file_engine_table ## 在 Clickhouse-local 中的使用 {#zai-clickhouse-local-zhong-de-shi-yong} -使用 [clickhouse-local](../../../operations/utilities/clickhouse-local.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 +使用 [clickhouse-local](../../../operations/utilities/clickhouse-local.md) 时,File 引擎除了 `Format` 之外,还可以接收文件路径参数。可以使用数字或名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 **例如:** ``` bash From c2c78929cbe1c61baa3693a56babcd7414cd85cb Mon Sep 17 00:00:00 2001 From: Kostiantyn Storozhuk Date: Mon, 28 Jun 2021 21:50:43 +0800 Subject: [PATCH 101/183] Implemented MySQL column comments support --- src/Databases/MySQL/DatabaseMySQL.cpp | 4 +-- src/Databases/MySQL/DatabaseMySQL.h | 3 +- .../MySQL/FetchTablesColumnsList.cpp | 33 +++++++++++-------- src/Databases/MySQL/FetchTablesColumnsList.h | 4 +-- .../MySQL/InterpretersMySQLDDLQuery.cpp | 7 +--- src/Storages/ColumnsDescription.cpp | 4 +-- src/Storages/ColumnsDescription.h | 2 +- src/TableFunctions/TableFunctionMySQL.cpp | 2 +- .../test_mysql_database_engine/test.py | 19 +++++++++++ 9 files changed, 49 insertions(+), 29 deletions(-) diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index b3b1c95ef7c..5f356348829 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -232,7 +232,7 @@ void DatabaseMySQL::fetchLatestTablesStructureIntoCache( wait_update_tables_name.emplace_back(table_modification_time.first); } - std::map tables_and_columns = fetchTablesColumnsList(wait_update_tables_name, local_context); + std::map tables_and_columns = fetchTablesColumnsList(wait_update_tables_name, local_context); for (const auto & table_and_columns : tables_and_columns) { @@ -296,7 +296,7 @@ std::map DatabaseMySQL::fetchTablesWithModificationTime(ContextP return tables_with_modification_time; } -std::map +std::map DatabaseMySQL::fetchTablesColumnsList(const std::vector & tables_name, ContextPtr local_context) const { const auto & settings = local_context->getSettingsRef(); diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index 04246ddcbf5..0b364f0d8d3 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -111,7 +112,7 @@ private: std::map fetchTablesWithModificationTime(ContextPtr local_context) const; - std::map fetchTablesColumnsList(const std::vector & tables_name, ContextPtr context) const; + std::map fetchTablesColumnsList(const std::vector & tables_name, ContextPtr context) const; void destroyLocalCacheExtraTables(const std::map & tables_with_modification_time) const; diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index cfd01d4ddc4..e792385d12f 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -40,14 +40,14 @@ String toQueryStringWithQuote(const std::vector & quote_list) namespace DB { -std::map fetchTablesColumnsList( +std::map fetchTablesColumnsList( mysqlxx::PoolWithFailover & pool, const String & database_name, const std::vector & tables_name, const Settings & settings, MultiEnum type_support) { - std::map tables_and_columns; + std::map tables_and_columns; if (tables_name.empty()) return tables_and_columns; @@ -62,6 +62,7 @@ std::map fetchTablesColumnsList( { std::make_shared(), "length" }, { std::make_shared(), "precision" }, { std::make_shared(), "scale" }, + { std::make_shared(), "column_comment" }, }; WriteBufferFromOwnString query; @@ -72,8 +73,9 @@ std::map fetchTablesColumnsList( " IS_NULLABLE = 'YES' AS is_nullable," " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned," " CHARACTER_MAXIMUM_LENGTH AS length," - " NUMERIC_PRECISION as numeric_precision," - " IF(ISNULL(NUMERIC_SCALE), DATETIME_PRECISION, NUMERIC_SCALE) AS scale" // we know DATETIME_PRECISION as a scale in CH + " NUMERIC_PRECISION AS numeric_precision," + " IF(ISNULL(NUMERIC_SCALE), DATETIME_PRECISION, NUMERIC_SCALE) AS scale," // we know DATETIME_PRECISION as a scale in CH + " COLUMN_COMMENT AS column_comment" " FROM INFORMATION_SCHEMA.COLUMNS" " WHERE "; @@ -94,21 +96,24 @@ std::map fetchTablesColumnsList( const auto & char_max_length_col = *block.getByPosition(5).column; const auto & precision_col = *block.getByPosition(6).column; const auto & scale_col = *block.getByPosition(7).column; + const auto & column_comment_col = *block.getByPosition(8).column; size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) { String table_name = table_name_col[i].safeGet(); - tables_and_columns[table_name].emplace_back( - column_name_col[i].safeGet(), - convertMySQLDataType( - type_support, - column_type_col[i].safeGet(), - settings.external_table_functions_use_nulls && is_nullable_col[i].safeGet(), - is_unsigned_col[i].safeGet(), - char_max_length_col[i].safeGet(), - precision_col[i].safeGet(), - scale_col[i].safeGet())); + tables_and_columns[table_name].add( + ColumnDescription( + column_name_col[i].safeGet(), + convertMySQLDataType( + type_support, + column_type_col[i].safeGet(), + settings.external_table_functions_use_nulls && is_nullable_col[i].safeGet(), + is_unsigned_col[i].safeGet(), + char_max_length_col[i].safeGet(), + precision_col[i].safeGet(), + scale_col[i].safeGet()), + column_comment_col[i].safeGet())); } } return tables_and_columns; diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h index 55f18e0115f..4b49fea864e 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.h +++ b/src/Databases/MySQL/FetchTablesColumnsList.h @@ -7,8 +7,8 @@ #include #include -#include #include +#include #include #include @@ -17,7 +17,7 @@ namespace DB { -std::map fetchTablesColumnsList( +std::map fetchTablesColumnsList( mysqlxx::PoolWithFailover & pool, const String & database_name, const std::vector & tables_name, diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index fbd537781de..7ebc4f1feb9 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -123,7 +123,6 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col throw Exception("Columns of different size provided.", ErrorCodes::LOGICAL_ERROR); ColumnsDescription columns_description; - ColumnDescription column_description; for ( auto [column_name_and_type, declare_column_ast] = std::tuple{columns_name_and_type.begin(), columns_definition->children.begin()}; @@ -139,11 +138,7 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col if (options->changes.count("comment")) comment = options->changes.at("comment")->as()->value.safeGet(); - column_description.name = column_name_and_type->name; - column_description.type = column_name_and_type->type; - if (!comment.empty()) - column_description.comment = std::move(comment); - columns_description.add(column_description); + columns_description.add(ColumnDescription(column_name_and_type->name, column_name_and_type->type, comment)); } return columns_description; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 4a904c96432..cd7afae8a78 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -43,8 +43,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ColumnDescription::ColumnDescription(String name_, DataTypePtr type_) - : name(std::move(name_)), type(std::move(type_)) +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, String comment_) + : name(std::move(name_)), type(std::move(type_)), comment(std::move(comment_)) { } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 7fff22abf71..338b519cee6 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -39,7 +39,7 @@ struct ColumnDescription ColumnDescription() = default; ColumnDescription(ColumnDescription &&) = default; ColumnDescription(const ColumnDescription &) = default; - ColumnDescription(String name_, DataTypePtr type_); + ColumnDescription(String name_, DataTypePtr type_, String comment_ = ""); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index eb310ef3696..f8e0c41634b 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -87,7 +87,7 @@ ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr contex throw Exception("MySQL table " + (remote_database_name.empty() ? "" : (backQuote(remote_database_name) + ".")) + backQuote(remote_table_name) + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); - return ColumnsDescription{columns->second}; + return columns->second; } StoragePtr TableFunctionMySQL::executeImpl( diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 22f790e39c3..e1891aebf05 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -167,6 +167,25 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): assert 'Database engine MySQL requested literal argument.' in str(exception.value) mysql_node.query("DROP DATABASE test_bad_arguments") +def test_column_comments_for_mysql_database_engine(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')") + assert 'test_database' in clickhouse_node.query('SHOW DATABASES') + + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`), `test` int COMMENT 'test comment') ENGINE=InnoDB;") + assert 'test comment' in clickhouse_node.query('DESCRIBE TABLE `test_database`.`test_table`') + + mysql_node.query("ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'") + assert 'add_column comment' in clickhouse_node.query( + "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'") + + mysql_node.query("DROP DATABASE test_database") + def test_data_types_support_level_for_mysql_database_engine(started_cluster): with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: From 764380f7015e944f6b51898e0b472b3eb40037cd Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 28 Jun 2021 19:56:15 +0300 Subject: [PATCH 102/183] Updated tests --- src/DataTypes/getLeastSupertype.cpp | 21 ++++++++-------- src/Functions/DateTimeTransforms.h | 7 +++++- .../01925_date_date_time_comparison.reference | 2 ++ .../01925_date_date_time_comparison.sql | 2 ++ .../01926_date_date_time_supertype.reference | 12 ++++++++++ .../01926_date_date_time_supertype.sql | 24 +++++++++++++++++++ 6 files changed, 57 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/01925_date_date_time_comparison.reference create mode 100644 tests/queries/0_stateless/01925_date_date_time_comparison.sql create mode 100644 tests/queries/0_stateless/01926_date_date_time_supertype.reference create mode 100644 tests/queries/0_stateless/01926_date_date_time_supertype.sql diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 4614d65ed8a..33b40abdd47 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -289,33 +289,34 @@ DataTypePtr getLeastSupertype(const DataTypes & types) if (have_datetime64 == 0) { - for (const auto & t : types) + for (const auto & type : types) { - if (const auto * data_type = typeid_cast(t.get())) - return std::make_shared(data_type->getTimeZone().getTimeZone()); + if (isDateTime(type)) + return type; } return std::make_shared(); } UInt8 max_scale = 0; - const DataTypeDateTime64 * max_scale_date_time = nullptr; + size_t max_scale_date_time_index = 0; - for (const auto & t : types) + for (size_t i = 0; i < types.size(); ++i) { - if (const auto * dt64 = typeid_cast(t.get())) + const auto & type = types[i]; + + if (const auto * date_time64_type = typeid_cast(type.get())) { - const auto scale = dt64->getScale(); + const auto scale = date_time64_type->getScale(); if (scale > max_scale) { - max_scale_date_time = dt64; + max_scale_date_time_index = i; max_scale = scale; } } } - assert(max_scale_date_time); - return std::make_shared(max_scale, max_scale_date_time->getTimeZone().getTimeZone()); + return types[max_scale_date_time_index]; } } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 70035cdda30..0f36fe52465 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -878,7 +878,12 @@ struct DateTimeTransformImpl } else { - Op::vector(sources->getData(), col_to->getData(), DateLUT::instance(), transform); + size_t time_zone_argument_position = 1; + if constexpr (std::is_same_v) + time_zone_argument_position = 2; + + const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform); } return mutable_result_col; diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.reference b/tests/queries/0_stateless/01925_date_date_time_comparison.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.sql b/tests/queries/0_stateless/01925_date_date_time_comparison.sql new file mode 100644 index 00000000000..99c67816a42 --- /dev/null +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.sql @@ -0,0 +1,2 @@ +SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Europe/Moscow'); +SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 5, 'Europe/Moscow'); diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.reference b/tests/queries/0_stateless/01926_date_date_time_supertype.reference new file mode 100644 index 00000000000..ec9933dfbd2 --- /dev/null +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.reference @@ -0,0 +1,12 @@ +Array +Array(DateTime(\'Europe/Moscow\')) +Array(DateTime64(5, \'Europe/Moscow\')) +Array(DateTime64(6, \'Europe/Moscow\')) +If +2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +2000-01-01 00:00:00.00000 DateTime64(5, \'Europe/Moscow\') +Cast +2000-01-01 00:00:00 DateTime(\'UTC\') +2000-01-01 00:00:00.00000 DateTime64(5, \'UTC\') diff --git a/tests/queries/0_stateless/01926_date_date_time_supertype.sql b/tests/queries/0_stateless/01926_date_date_time_supertype.sql new file mode 100644 index 00000000000..559cd465ebb --- /dev/null +++ b/tests/queries/0_stateless/01926_date_date_time_supertype.sql @@ -0,0 +1,24 @@ +SELECT 'Array'; + +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow')]); +SELECT toTypeName([toDate('2000-01-01'), toDateTime('2000-01-01', 'Europe/Moscow'), toDateTime64('2000-01-01', 5, 'Europe/Moscow'), toDateTime64('2000-01-01', 6, 'Europe/Moscow')]); + +DROP TABLE IF EXISTS predicate_table; +CREATE TABLE predicate_table (value UInt8) ENGINE=TinyLog; + +INSERT INTO predicate_table VALUES (0), (1); + +SELECT 'If'; + +WITH toDate('2000-01-01') as a, toDateTime('2000-01-01', 'Europe/Moscow') as b +SELECT if(value, b, a) as result, toTypeName(result) +FROM predicate_table; + +WITH toDateTime('2000-01-01') as a, toDateTime64('2000-01-01', 5, 'Europe/Moscow') as b +SELECT if(value, b, a) as result, toTypeName(result) +FROM predicate_table; + +SELECT 'Cast'; +SELECT CAST(toDate('2000-01-01') AS DateTime('UTC')) AS x, toTypeName(x); +SELECT CAST(toDate('2000-01-01') AS DateTime64(5, 'UTC')) AS x, toTypeName(x); From 60530b4dae2049d22d80b9756652e45d1c3e1ab5 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 29 Jun 2021 00:54:22 +0300 Subject: [PATCH 103/183] Fixed tests --- .../queries/0_stateless/00735_long_conditional.reference | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00735_long_conditional.reference b/tests/queries/0_stateless/00735_long_conditional.reference index 6308a48218b..082c2d49de9 100644 --- a/tests/queries/0_stateless/00735_long_conditional.reference +++ b/tests/queries/0_stateless/00735_long_conditional.reference @@ -92,8 +92,8 @@ value vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') column vs value 0 1 1 Int8 Int8 Int8 @@ -189,6 +189,6 @@ column vs value 0 1 1 UInt64 Decimal(18, 0) Decimal(38, 0) 0 1 1 UInt64 Decimal(38, 0) Decimal(38, 0) 1970-01-01 1970-01-02 1970-01-02 Date Date Date -2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime -2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime +2000-01-01 2000-01-01 00:00:01 2000-01-01 00:00:01 Date DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') +2000-01-01 00:00:00 2000-01-02 2000-01-02 00:00:00 DateTime(\'Europe/Moscow\') Date DateTime(\'Europe/Moscow\') 1970-01-01 03:00:00 1970-01-01 03:00:01 1970-01-01 03:00:01 DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') DateTime(\'Europe/Moscow\') From 5d89907ae9b938e544da8161d16da9cab38e76ad Mon Sep 17 00:00:00 2001 From: Ildus Kurbangaliev Date: Tue, 29 Jun 2021 00:34:45 +0200 Subject: [PATCH 104/183] Fix style errors --- src/Functions/array/mapOp.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 5c2637270d5..ac4ac565546 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -68,7 +68,7 @@ private: if (which_ch_val.isFloat() != which_val.isFloat()) throw Exception( - "All value types in " + getName() + " should be ether or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + "All value types in " + getName() + " should be either or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!(check_val_type->equals(*promoted_val_type))) { @@ -386,9 +386,9 @@ private: } } else - throw Exception{ + throw Exception( "Illegal column type " + arguments[0].type->getName() + " in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } // we can check const columns before any processing @@ -439,9 +439,9 @@ private: case TypeIndex::String: return execute1(row_count, res_type, res_value_type, args); default: - throw Exception{ + throw Exception( "Illegal column type " + key_type->getName() + " for keys in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } }; From 7cdde9ed360ef7fed6078209420a8aeae184b9f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jun 2021 01:48:54 +0300 Subject: [PATCH 105/183] Add links to builds on the front page --- docs/_includes/install/freebsd.sh | 3 +++ docs/_includes/install/mac-arm.sh | 3 +++ docs/_includes/install/mac-x86.sh | 3 +++ docs/en/getting-started/install.md | 7 ++++--- website/templates/index/quickstart.html | 22 +++++++++++++++++++--- 5 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 docs/_includes/install/freebsd.sh create mode 100644 docs/_includes/install/mac-arm.sh create mode 100644 docs/_includes/install/mac-x86.sh diff --git a/docs/_includes/install/freebsd.sh b/docs/_includes/install/freebsd.sh new file mode 100644 index 00000000000..d664ea19a18 --- /dev/null +++ b/docs/_includes/install/freebsd.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/freebsd/clickhouse' +chmod a+x ./clickhouse +sudo ./clickhouse install diff --git a/docs/_includes/install/mac-arm.sh b/docs/_includes/install/mac-arm.sh new file mode 100644 index 00000000000..9fc5c0cef22 --- /dev/null +++ b/docs/_includes/install/mac-arm.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' +chmod a+x ./clickhouse +./clickhouse diff --git a/docs/_includes/install/mac-x86.sh b/docs/_includes/install/mac-x86.sh new file mode 100644 index 00000000000..1423769b6d5 --- /dev/null +++ b/docs/_includes/install/mac-x86.sh @@ -0,0 +1,3 @@ +wget 'https://builds.clickhouse.tech/master/macos/clickhouse' +chmod a+x ./clickhouse +./clickhouse diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 4256de49e4a..af4061ad484 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -107,9 +107,10 @@ sudo ./clickhouse install For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). -- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` -- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` -- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` +- [MacOS x86_64](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` +- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` +- [FreeBSD x86_64](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [Linux AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. diff --git a/website/templates/index/quickstart.html b/website/templates/index/quickstart.html index 0d967e7b96c..b74e52905ff 100644 --- a/website/templates/index/quickstart.html +++ b/website/templates/index/quickstart.html @@ -2,9 +2,7 @@

Quick start

-

System requirements for pre-built packages: Linux, x86_64 with SSE 4.2.

- -

For other operating systems the easiest way to get started is using From ffdd5c67ae5f188f71e97b74051c50c3b64c3eff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jun 2021 01:55:04 +0300 Subject: [PATCH 106/183] Fix error in docs --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index af4061ad484..8331870d775 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -108,7 +108,7 @@ sudo ./clickhouse install For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). - [MacOS x86_64](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` -- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` +- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.tech/master/macos-aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse` - [FreeBSD x86_64](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` - [Linux AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` From 9ebbdb19d5effb6bf546a1fc58cce7e89a7dd85b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 29 Jun 2021 03:17:09 +0300 Subject: [PATCH 107/183] Fix obsolete scripts --- docs/tools/build.py | 4 ---- docs/tools/test.py | 30 ------------------------------ website/README.md | 6 +++--- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 39e91f59cc4..dae61eec87e 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -154,9 +154,6 @@ def build(args): if not args.skip_website: website.build_website(args) - if not args.skip_test_templates: - test.test_templates(args.website_dir) - if not args.skip_docs: generate_cmake_flags_files() @@ -197,7 +194,6 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument('--skip-docs', action='store_true') - arg_parser.add_argument('--skip-test-templates', action='store_true') arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument('--minify', action='store_true') arg_parser.add_argument('--htmlproofer', action='store_true') diff --git a/docs/tools/test.py b/docs/tools/test.py index ada4df29644..526294dbe21 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -7,36 +7,6 @@ import bs4 import subprocess -def test_template(template_path): - if template_path.endswith('amp.html'): - # Inline CSS/JS is ok for AMP pages - return - - logging.debug(f'Running tests for {template_path} template') - with open(template_path, 'r') as f: - soup = bs4.BeautifulSoup( - f, - features='html.parser' - ) - for tag in soup.find_all(): - style_attr = tag.attrs.get('style') - assert not style_attr, f'Inline CSS is prohibited, found {style_attr} in {template_path}' - - if tag.name == 'script': - if tag.attrs.get('type') == 'application/ld+json': - continue - for content in tag.contents: - assert not content, f'Inline JavaScript is prohibited, found "{content}" in {template_path}' - - -def test_templates(base_dir): - logging.info('Running tests for templates') - for root, _, filenames in os.walk(base_dir): - for filename in filenames: - if filename.endswith('.html'): - test_template(os.path.join(root, filename)) - - def test_single_page(input_path, lang): with open(input_path) as f: soup = bs4.BeautifulSoup( diff --git a/website/README.md b/website/README.md index a09a00379d1..9f808c6f658 100644 --- a/website/README.md +++ b/website/README.md @@ -12,7 +12,7 @@ sudo npm install -g purify-css amphtml-validator sudo apt install wkhtmltopdf virtualenv build -./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-docs --skip-test-templates --livereload 8080 +./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-docs --livereload 8080 # Open the web browser and go to http://localhost:8080/ ``` @@ -20,11 +20,11 @@ virtualenv build # How to quickly test the blog ``` -./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-git-log --skip-docs --skip-test-templates --livereload 8080 +./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-git-log --skip-docs --livereload 8080 ``` # How to quickly test the ugly annoying broken links in docs ``` -./build.py --skip-multi-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-test-templates --lang en --livereload 8080 +./build.py --skip-multi-page --skip-amp --skip-pdf --skip-blog --skip-git-log --lang en --livereload 8080 ``` From 296593716dc7905f4f25538a105236b3717d9877 Mon Sep 17 00:00:00 2001 From: Kostiantyn Storozhuk Date: Tue, 29 Jun 2021 14:37:53 +0800 Subject: [PATCH 108/183] Reverted constructor change --- src/Databases/MySQL/FetchTablesColumnsList.cpp | 10 ++++++---- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 8 ++++++-- src/Storages/ColumnsDescription.cpp | 4 ++-- src/Storages/ColumnsDescription.h | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index e792385d12f..d668e79198c 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -102,8 +102,7 @@ std::map fetchTablesColumnsList( for (size_t i = 0; i < rows; ++i) { String table_name = table_name_col[i].safeGet(); - tables_and_columns[table_name].add( - ColumnDescription( + ColumnDescription column_description( column_name_col[i].safeGet(), convertMySQLDataType( type_support, @@ -112,8 +111,11 @@ std::map fetchTablesColumnsList( is_unsigned_col[i].safeGet(), char_max_length_col[i].safeGet(), precision_col[i].safeGet(), - scale_col[i].safeGet()), - column_comment_col[i].safeGet())); + scale_col[i].safeGet()) + ); + column_description.comment = column_comment_col[i].safeGet(); + + tables_and_columns[table_name].add(column_description); } } return tables_and_columns; diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 7ebc4f1feb9..bf4dfee2780 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -137,8 +137,12 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col if (const auto * options = declare_column->column_options->as()) if (options->changes.count("comment")) comment = options->changes.at("comment")->as()->value.safeGet(); - - columns_description.add(ColumnDescription(column_name_and_type->name, column_name_and_type->type, comment)); + + ColumnDescription column_description(column_name_and_type->name, column_name_and_type->type); + if(!comment.empty()) + column_description.comment = std::move(comment); + + columns_description.add(column_description); } return columns_description; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index cd7afae8a78..4a904c96432 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -43,8 +43,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, String comment_) - : name(std::move(name_)), type(std::move(type_)), comment(std::move(comment_)) +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_) + : name(std::move(name_)), type(std::move(type_)) { } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 338b519cee6..7fff22abf71 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -39,7 +39,7 @@ struct ColumnDescription ColumnDescription() = default; ColumnDescription(ColumnDescription &&) = default; ColumnDescription(const ColumnDescription &) = default; - ColumnDescription(String name_, DataTypePtr type_, String comment_ = ""); + ColumnDescription(String name_, DataTypePtr type_); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } From 090d1360808d86b1651a1071952156e9398a85e3 Mon Sep 17 00:00:00 2001 From: hcz Date: Tue, 29 Jun 2021 15:18:40 +0800 Subject: [PATCH 109/183] Fix links in median.md --- .../aggregate-functions/reference/median.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index b4f38a9b562..5d681389eb0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -4,14 +4,14 @@ The `median*` functions are the aliases for the corresponding `quantile*` functi Functions: -- `median` — Alias for [quantile](#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). +- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted#quantiletdigestweighted). **Example** From a9510f25f9e9c6bdc0e143346a3f829d68164c3b Mon Sep 17 00:00:00 2001 From: Kostiantyn Storozhuk Date: Tue, 29 Jun 2021 15:22:08 +0800 Subject: [PATCH 110/183] Style fix --- src/Databases/MySQL/FetchTablesColumnsList.cpp | 2 +- src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp | 6 +++--- tests/integration/test_mysql_database_engine/test.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index d668e79198c..bbd187090df 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -114,7 +114,7 @@ std::map fetchTablesColumnsList( scale_col[i].safeGet()) ); column_description.comment = column_comment_col[i].safeGet(); - + tables_and_columns[table_name].add(column_description); } } diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index bf4dfee2780..503e5c0e707 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -137,11 +137,11 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col if (const auto * options = declare_column->column_options->as()) if (options->changes.count("comment")) comment = options->changes.at("comment")->as()->value.safeGet(); - + ColumnDescription column_description(column_name_and_type->name, column_name_and_type->type); - if(!comment.empty()) + if (!comment.empty()) column_description.comment = std::move(comment); - + columns_description.add(column_description); } diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index e1891aebf05..39198f6d56d 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -183,9 +183,9 @@ def test_column_comments_for_mysql_database_engine(started_cluster): mysql_node.query("ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'") assert 'add_column comment' in clickhouse_node.query( "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'") - + mysql_node.query("DROP DATABASE test_database") - + def test_data_types_support_level_for_mysql_database_engine(started_cluster): with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: From 941a6e539fcd3d169b7a425731db07da31afb591 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jun 2021 10:36:02 +0300 Subject: [PATCH 111/183] Fix --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2f4c2efc7ba..104bf8fb9c9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2334,7 +2334,7 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c /// It's a DROP PART and it's already executed by fetching some covering part bool is_drop_part = !drop_range.isFakeDropRangePart(); - if (is_drop_part && part->info != drop_range) + if (is_drop_part && part->info.min_block != drop_range.min_block && part->info.max_block != drop_range.max_block) { /// Why we check only min and max blocks here without checking merge /// level? It's a tricky situation which can happen on a stale From 41620a5661335f0c5d89a81c2b795180b976bf8e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 29 Jun 2021 11:05:07 +0300 Subject: [PATCH 112/183] Fixed tests --- src/DataTypes/getLeastSupertype.cpp | 2 +- tests/queries/0_stateless/01925_date_date_time_comparison.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 33b40abdd47..a950d18b50a 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -308,7 +308,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types) if (const auto * date_time64_type = typeid_cast(type.get())) { const auto scale = date_time64_type->getScale(); - if (scale > max_scale) + if (scale >= max_scale) { max_scale_date_time_index = i; max_scale = scale; diff --git a/tests/queries/0_stateless/01925_date_date_time_comparison.sql b/tests/queries/0_stateless/01925_date_date_time_comparison.sql index 99c67816a42..13e856384d2 100644 --- a/tests/queries/0_stateless/01925_date_date_time_comparison.sql +++ b/tests/queries/0_stateless/01925_date_date_time_comparison.sql @@ -1,2 +1,2 @@ SELECT toDate('2000-01-01') < toDateTime('2000-01-01 00:00:01', 'Europe/Moscow'); -SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 5, 'Europe/Moscow'); +SELECT toDate('2000-01-01') < toDateTime64('2000-01-01 00:00:01', 0, 'Europe/Moscow'); From bcbfbae2a94c14e9c2819c8c849063390aa54ec6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 29 Jun 2021 11:25:27 +0300 Subject: [PATCH 113/183] Update median.md --- .../aggregate-functions/reference/median.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 5d681389eb0..8ab2273b32e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -4,14 +4,14 @@ The `median*` functions are the aliases for the corresponding `quantile*` functi Functions: -- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted#quantiletdigestweighted). +- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). **Example** From 44f1a96ad6eab155e7841e7e0f920d46b55819dd Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jun 2021 12:49:25 +0300 Subject: [PATCH 114/183] Better test_version_update_after_mutation --- .../integration/test_version_update_after_mutation/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index a1ae17b8451..4f8a61a5bf0 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -39,8 +39,8 @@ def test_mutate_and_upgrade(start_cluster): node2.restart_with_latest_version(signal=9) # After hard restart table can be in readonly mode - exec_query_with_retry(node2, "INSERT INTO mt VALUES ('2020-02-13', 3)") - exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt") + exec_query_with_retry(node2, "INSERT INTO mt VALUES ('2020-02-13', 3)", retry_count=60) + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt", retry_count=60) assert node1.query("SELECT COUNT() FROM mt") == "2\n" assert node2.query("SELECT COUNT() FROM mt") == "2\n" @@ -79,7 +79,8 @@ def test_upgrade_while_mutation(start_cluster): node3.restart_with_latest_version(signal=9) - exec_query_with_retry(node3, "SYSTEM RESTART REPLICA mt1") + # checks for readonly + exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", retry_count=60) node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) # will delete nothing, but previous async mutation will finish with this query From 7680eab0e4c122aafe7f90c5a7239c6bc127fefe Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 29 Jun 2021 13:03:00 +0300 Subject: [PATCH 115/183] one more fix --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 104bf8fb9c9..f897d833096 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2334,7 +2334,7 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c /// It's a DROP PART and it's already executed by fetching some covering part bool is_drop_part = !drop_range.isFakeDropRangePart(); - if (is_drop_part && part->info.min_block != drop_range.min_block && part->info.max_block != drop_range.max_block) + if (is_drop_part && (part->info.min_block != drop_range.min_block || part->info.max_block != drop_range.max_block)) { /// Why we check only min and max blocks here without checking merge /// level? It's a tricky situation which can happen on a stale From a9fad56b7a794e52f8e2b530cd9db5338ac17fe4 Mon Sep 17 00:00:00 2001 From: Kostiantyn Storozhuk Date: Tue, 29 Jun 2021 18:07:18 +0800 Subject: [PATCH 116/183] Flaky test fixed --- tests/integration/test_mysql_database_engine/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 39198f6d56d..b3c057e5a93 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -180,6 +180,8 @@ def test_column_comments_for_mysql_database_engine(started_cluster): "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`), `test` int COMMENT 'test comment') ENGINE=InnoDB;") assert 'test comment' in clickhouse_node.query('DESCRIBE TABLE `test_database`.`test_table`') + time.sleep( + 3) # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained mysql_node.query("ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'") assert 'add_column comment' in clickhouse_node.query( "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'") From 27d3251aa277636f0750730e4844d4229c76d901 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 29 Jun 2021 14:51:11 +0400 Subject: [PATCH 117/183] Update s3Cluster.md --- docs/ru/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 32916a2b122..826f1a5b25b 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Аргументы** - `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам. -- `source` — URL-адрес бакета с указанием пути к файлу. Поддерживает следующие подстановочные символы в режиме "только чтение": `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL файла или нескольких файлов. Поддерживает следующие символы подстановки: `*`, `?`, `{'abc','def'}` и `{N..M}`, где `N`, `M` — числа, `abc`, `def` — строки. Подробнее смотрите в разделе [Символы подстановки](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` и `secret_access_key` — ключи, указывающие на учетные данные для использования с точкой приема запроса. Необязательные параметры. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'column1_name column1_type, column2_name column2_type, ...'`. From 504db73840de89f806659b41a744210ec6745353 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 29 Jun 2021 14:51:36 +0400 Subject: [PATCH 118/183] Update docs/en/sql-reference/table-functions/s3Cluster.md --- docs/en/sql-reference/table-functions/s3Cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 794d009f644..65565aa92cb 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, stru **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. From bf827936b78a9db2594c6f99975a20dd29c92db7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 29 Jun 2021 14:53:34 +0300 Subject: [PATCH 119/183] Rename PrewhereActions --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 2 +- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h | 4 ++-- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 2 +- src/Storages/MergeTree/MergeTreeRangeReader.h | 7 ++++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 68f754b08fb..9334baef964 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -53,7 +53,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( if (prewhere_info) { - prewhere_actions = std::make_unique(); + prewhere_actions = std::make_unique(); if (prewhere_info->alias_actions) prewhere_actions->alias_actions = std::make_shared(prewhere_info->alias_actions, actions_settings); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 2ae39dbb058..8da9b002e16 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -13,7 +13,7 @@ namespace DB class IMergeTreeReader; class UncompressedCache; class MarkCache; -struct PrewhereActions; +struct PrewhereExprInfo; /// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor class MergeTreeBaseSelectProcessor : public SourceWithProgress @@ -60,7 +60,7 @@ protected: StorageMetadataPtr metadata_snapshot; PrewhereInfoPtr prewhere_info; - std::unique_ptr prewhere_actions; + std::unique_ptr prewhere_actions; UInt64 max_block_size_rows; UInt64 preferred_block_size_bytes; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 8072aa6a3dc..2347280a4a0 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -520,7 +520,7 @@ size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn: MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereActions * prewhere_info_, + const PrewhereExprInfo * prewhere_info_, bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part->index_granularity)) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 7c36ca49c99..8cdf485ff1e 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -18,7 +18,8 @@ using PrewhereInfoPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -struct PrewhereActions +/// The same as PrewhereInfo, but with ExpressionActions instead of ActionsDAG +struct PrewhereExprInfo { /// Actions which are executed in order to alias columns are used for prewhere actions. ExpressionActionsPtr alias_actions; @@ -42,7 +43,7 @@ public: MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereActions * prewhere_info_, + const PrewhereExprInfo * prewhere_info_, bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -235,7 +236,7 @@ private: IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly. - const PrewhereActions * prewhere_info; + const PrewhereExprInfo * prewhere_info; Stream stream; From 5a746b61f30607bb74b9dbee153154cb63c1b6c1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 29 Jun 2021 15:14:12 +0300 Subject: [PATCH 120/183] Update website/templates/index/quickstart.html Co-authored-by: Ivan Blinkov --- website/templates/index/quickstart.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/quickstart.html b/website/templates/index/quickstart.html index b74e52905ff..1c99de63354 100644 --- a/website/templates/index/quickstart.html +++ b/website/templates/index/quickstart.html @@ -2,7 +2,7 @@

Quick start

-
-
Date: Thu, 1 Jul 2021 16:59:16 +0100 Subject: [PATCH 160/183] CLICKHOUSE-1194: add skipping index to the beginning of the list add the FIRST keyword to the ADD INDEX command to be able to add index in the beginning of the list. Signed-off-by: Aleksei Semiglazov --- .../statements/alter/index/index.md | 2 +- docs/ja/sql-reference/statements/alter.md | 2 +- .../statements/alter/index/index.md | 2 +- docs/zh/sql-reference/statements/alter.md | 2 +- src/Parsers/ASTAlterQuery.cpp | 5 ++-- src/Parsers/ParserAlterQuery.cpp | 4 ++- src/Storages/AlterCommands.cpp | 5 ++++ src/Storages/AlterCommands.h | 2 +- .../01932_alter_index_with_order.reference | 9 ++++++ .../01932_alter_index_with_order.sql | 28 +++++++++++++++++++ 10 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/01932_alter_index_with_order.reference create mode 100644 tests/queries/0_stateless/01932_alter_index_with_order.sql diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/index/index.md index 56d81aaf52f..fd5657c3666 100644 --- a/docs/en/sql-reference/statements/alter/index/index.md +++ b/docs/en/sql-reference/statements/alter/index/index.md @@ -8,7 +8,7 @@ toc_title: INDEX The following operations are available: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. - `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. diff --git a/docs/ja/sql-reference/statements/alter.md b/docs/ja/sql-reference/statements/alter.md index 226565dd226..0967f60e06a 100644 --- a/docs/ja/sql-reference/statements/alter.md +++ b/docs/ja/sql-reference/statements/alter.md @@ -175,7 +175,7 @@ MODIFY ORDER BY new_expression [複製](../../engines/table-engines/mergetree-family/replication.md) テーブル)。 次の操作 利用できます: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` -付加価指数の説明をテーブルメタデータを指すものとします。 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` -付加価指数の説明をテーブルメタデータを指すものとします。 - `ALTER TABLE [db].name DROP INDEX name` -除去す指標の説明からテーブルメタデータを削除を行指数のファイルからディスク。 diff --git a/docs/ru/sql-reference/statements/alter/index/index.md b/docs/ru/sql-reference/statements/alter/index/index.md index 632f11ed906..1f6bbea5c4b 100644 --- a/docs/ru/sql-reference/statements/alter/index/index.md +++ b/docs/ru/sql-reference/statements/alter/index/index.md @@ -9,7 +9,7 @@ toc_title: "Манипуляции с индексами" Добавить или удалить индекс можно с помощью операций ``` sql -ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] +ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name] ALTER TABLE [db.]name DROP INDEX name ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name ``` diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 446feac96ce..4d1cdca71e5 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -174,7 +174,7 @@ MODIFY ORDER BY new_expression 该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。 下列操作是允许的: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - 在表的元数据中增加索引说明 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 在表的元数据中增加索引说明 - `ALTER TABLE [db].name DROP INDEX name` - 从表的元数据中删除索引描述,并从磁盘上删除索引文件 diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 918abc39037..7e60d1175e2 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -137,8 +137,9 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); index_decl->formatImpl(settings, state, frame); - /// AFTER - if (index) + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (index) /// AFTER { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); index->formatImpl(settings, state, frame); diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index d659db64b83..2908b171ca6 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -231,7 +231,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_idx_decl.parse(pos, command->index_decl, expected)) return false; - if (s_after.ignore(pos, expected)) + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) { if (!parser_name.parse(pos, command->index, expected)) return false; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 1cb936cbb84..3ac457e52d6 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -211,6 +211,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.after_index_name = command_ast->index->as().name(); command.if_not_exists = command_ast->if_not_exists; + command.first = command_ast->first; return command; } @@ -454,6 +455,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) auto insert_it = metadata.secondary_indices.end(); + /// insert the index in the beginning of the indices list + if (first) + insert_it = metadata.secondary_indices.begin(); + if (!after_index_name.empty()) { insert_it = std::find_if( diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 4e9c9764753..d523bb2783e 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -77,7 +77,7 @@ struct AlterCommand /// For ADD or MODIFY - after which column to add a new one. If an empty string, add to the end. String after_column; - /// For ADD_COLUMN, MODIFY_COLUMN - Add to the begin if it is true. + /// For ADD_COLUMN, MODIFY_COLUMN, ADD_INDEX - Add to the begin if it is true. bool first = false; /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.reference b/tests/queries/0_stateless/01932_alter_index_with_order.reference new file mode 100644 index 00000000000..07e1aab3df9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.reference @@ -0,0 +1,9 @@ +default alter_index_test index_a set a 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.sql b/tests/queries/0_stateless/01932_alter_index_with_order.sql new file mode 100644 index 00000000000..0f2953b53f9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS alter_index_test; + +CREATE TABLE alter_index_test ( + a UInt32, + b Date, + c UInt32, + d UInt32, + INDEX index_a a TYPE set(0) GRANULARITY 1 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +ALTER TABLE alter_index_test ADD INDEX index_b b type minmax granularity 1 FIRST; + +ALTER TABLE alter_index_test ADD INDEX index_c c type set(0) granularity 2 AFTER index_b; + +ALTER TABLE alter_index_test ADD INDEX index_d d type set(0) granularity 1; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DETACH TABLE alter_index_test; +ATTACH TABLE alter_index_test; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DROP TABLE IF EXISTS alter_index_test; From 012f67e6f865f81ec15c08fde846885e6eb63885 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 1 Jul 2021 23:29:00 +0300 Subject: [PATCH 161/183] Update libpq --- contrib/libpq | 2 +- contrib/libpq-cmake/CMakeLists.txt | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/contrib/libpq b/contrib/libpq index c7624588ddd..69e8a80e98f 160000 --- a/contrib/libpq +++ b/contrib/libpq @@ -1 +1 @@ -Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde +Subproject commit 69e8a80e98f27e3a5deec617334e31db2b9ed7d7 diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 028fabe52b8..4f6a1554d10 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/fe-lobj.c" "${LIBPQ_SOURCE_DIR}/fe-misc.c" "${LIBPQ_SOURCE_DIR}/fe-print.c" - "${LIBPQ_SOURCE_DIR}/fe-protocol2.c" + "${LIBPQ_SOURCE_DIR}/fe-trace.c" "${LIBPQ_SOURCE_DIR}/fe-protocol3.c" "${LIBPQ_SOURCE_DIR}/fe-secure.c" "${LIBPQ_SOURCE_DIR}/fe-secure-common.c" @@ -18,8 +18,12 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/pqexpbuffer.c" "${LIBPQ_SOURCE_DIR}/common/scram-common.c" - "${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/sha2.c" + "${LIBPQ_SOURCE_DIR}/common/sha1.c" "${LIBPQ_SOURCE_DIR}/common/md5.c" + "${LIBPQ_SOURCE_DIR}/common/md5_common.c" + "${LIBPQ_SOURCE_DIR}/common/hmac_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/cryptohash.c" "${LIBPQ_SOURCE_DIR}/common/saslprep.c" "${LIBPQ_SOURCE_DIR}/common/unicode_norm.c" "${LIBPQ_SOURCE_DIR}/common/ip.c" From 531e48afa34409fdb942b22aaa939816f17ef346 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 2 Jul 2021 00:39:41 +0300 Subject: [PATCH 162/183] Update 01923_network_receive_time_metric_insert.sh --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 2a9807af10d..bcb3775f86a 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -11,7 +11,7 @@ seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; - WITH ProfileEvents.Values[indexOf(ProfileEvents.Names, 'NetworkReceiveElapsedMicroseconds')] AS time + WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS time SELECT time >= 1000000 ? 1 : time FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" From 2866d45681e6dd3b9429d5fd4347b1fb2216da98 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 02:01:13 +0300 Subject: [PATCH 163/183] Add support for queries with `null` quoted identifier and ON CLUSTER --- src/Common/StringUtils/StringUtils.h | 6 +++++- .../0_stateless/01932_null_valid_identifier.reference | 3 +++ tests/queries/0_stateless/01932_null_valid_identifier.sql | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01932_null_valid_identifier.reference create mode 100644 tests/queries/0_stateless/01932_null_valid_identifier.sql diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index 20c0a5ca380..f6ad61f8fd9 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -149,7 +149,11 @@ inline bool isPunctuationASCII(char c) inline bool isValidIdentifier(const std::string_view & str) { - return !str.empty() && isValidIdentifierBegin(str[0]) && std::all_of(str.begin() + 1, str.end(), isWordCharASCII); + return !str.empty() + && isValidIdentifierBegin(str[0]) + && std::all_of(str.begin() + 1, str.end(), isWordCharASCII) + /// NULL is not a valid identifier in SQL, any case. + && !(str.size() == strlen("null") && 0 == strncasecmp(str.data(), "null", strlen("null"))); } /// Works assuming isAlphaASCII. diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.reference b/tests/queries/0_stateless/01932_null_valid_identifier.reference new file mode 100644 index 00000000000..8600160f48c --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.reference @@ -0,0 +1,3 @@ +1 +1 +1 \N diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.sql b/tests/queries/0_stateless/01932_null_valid_identifier.sql new file mode 100644 index 00000000000..31f1a771675 --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.sql @@ -0,0 +1,3 @@ +SELECT `null` FROM remote('127.0.0.2', view(SELECT 1 AS `null`)); +SELECT `NULL` FROM remote('127.0.0.2', view(SELECT 1 AS `NULL`)); +SELECT `nULl`, null FROM remote('127.0.0.2', view(SELECT 1 AS `nULl`)); From e6f0997924cfd3f80cb842550d160eec793f4d57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 02:01:52 +0300 Subject: [PATCH 164/183] Remove Arcadia --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index afd11cb5a7d..8453094cc65 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -251,3 +251,4 @@ 01924_argmax_bitmap_state 01914_exchange_dictionaries 01923_different_expression_name_alias +01932_null_valid_identifier From 63c71a7b4b330dc18ca57ded95364be6e5c758de Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 2 Jul 2021 02:12:31 +0300 Subject: [PATCH 165/183] kerberized HDFS test fix if run in parallel --- tests/integration/helpers/cluster.py | 9 ++++++--- .../hdfs_configs/bootstrap.sh | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index bd2f7d2bd8a..0799c8ed0e8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1188,15 +1188,18 @@ class ClickHouseCluster: time.sleep(1) - def wait_hdfs_to_start(self, timeout=300): + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: try: self.hdfs_api.write_data("/somefilewithrandomname222", "1") logging.debug("Connected to HDFS and SafeMode disabled! ") + if check_marker: + self.hdfs_api.read_data("/preparations_done_marker") + return except Exception as ex: - logging.exception("Can't connect to HDFS " + str(ex)) + logging.exception("Can't connect to HDFS or preparations are not done yet " + str(ex)) time.sleep(1) raise Exception("Can't wait HDFS to start") @@ -1443,7 +1446,7 @@ class ClickHouseCluster: os.chmod(self.hdfs_kerberized_logs_dir, stat.S_IRWXO) run_and_check(self.base_kerberized_hdfs_cmd + common_opts) self.make_hdfs_api(kerberized=True) - self.wait_hdfs_to_start() + self.wait_hdfs_to_start(check_marker=True) if self.with_mongo and self.base_mongo_cmd: logging.debug('Setup Mongo') diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh index 971491d4053..769056d70b3 100755 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh @@ -242,6 +242,7 @@ sleep 3 /usr/local/hadoop/bin/hdfs dfs -mkdir /user/specuser /usr/local/hadoop/bin/hdfs dfs -chown specuser /user/specuser +echo "chown_completed" | /usr/local/hadoop/bin/hdfs dfs -appendToFile - /preparations_done_marker kdestroy From 7a993404b4d222884bedf4a933f999213be48b5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 02:30:18 +0300 Subject: [PATCH 166/183] Whitespace --- programs/benchmark/Benchmark.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index c8f1a4eef47..859222c236e 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -37,6 +37,7 @@ #include #include + namespace fs = std::filesystem; /** A tool for evaluating ClickHouse performance. From b33d91412db32a9352c481f2e7d6b10f79bdb6c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 03:07:23 +0300 Subject: [PATCH 167/183] Correctly throw exception on invalid dates --- src/IO/ReadHelpers.cpp | 2 +- src/IO/ReadHelpers.h | 40 +++++++++++++------ .../0_stateless/01933_invalid_date.reference | 1 + .../0_stateless/01933_invalid_date.sql | 10 +++++ 4 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01933_invalid_date.reference create mode 100644 tests/queries/0_stateless/01933_invalid_date.sql diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 8e9a14a20fb..2a5594a6866 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -765,7 +765,7 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) auto ignore_delimiter = [&] { - if (!buf.eof()) + if (!buf.eof() && !isNumericASCII(*buf.position())) { ++buf.position(); return true; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index d4e2db0b553..4e101aaaf63 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -572,27 +572,43 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) /// Optimistic path, when whole value is in buffer. if (!buf.eof() && buf.position() + 10 <= buf.buffer().end()) { - UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0'); - buf.position() += 5; + char * pos = buf.position(); - UInt8 month = buf.position()[0] - '0'; - if (isNumericASCII(buf.position()[1])) + /// YYYY-MM-DD + /// YYYY-MM-D + /// YYYY-M-DD + /// YYYY-M-D + + /// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits. + + UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0'); + pos += 5; + + if (isNumericASCII(pos[-1])) + return ReturnType(false); + + UInt8 month = pos[0] - '0'; + if (isNumericASCII(pos[1])) { - month = month * 10 + buf.position()[1] - '0'; - buf.position() += 3; + month = month * 10 + pos[1] - '0'; + pos += 3; } else - buf.position() += 2; + pos += 2; - UInt8 day = buf.position()[0] - '0'; - if (isNumericASCII(buf.position()[1])) + if (isNumericASCII(pos[-1])) + return ReturnType(false); + + UInt8 day = pos[0] - '0'; + if (isNumericASCII(pos[1])) { - day = day * 10 + buf.position()[1] - '0'; - buf.position() += 2; + day = day * 10 + pos[1] - '0'; + pos += 2; } else - buf.position() += 1; + pos += 1; + buf.position() = pos; date = LocalDate(year, month, day); return ReturnType(true); } diff --git a/tests/queries/0_stateless/01933_invalid_date.reference b/tests/queries/0_stateless/01933_invalid_date.reference new file mode 100644 index 00000000000..829e7e8c420 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.reference @@ -0,0 +1 @@ +2019-07-08 diff --git a/tests/queries/0_stateless/01933_invalid_date.sql b/tests/queries/0_stateless/01933_invalid_date.sql new file mode 100644 index 00000000000..aac09c99e60 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.sql @@ -0,0 +1,10 @@ +SELECT toDate('07-08-2019'); -- { serverError 6 } +SELECT toDate('2019-0708'); -- { serverError 38 } +SELECT toDate('201907-08'); -- { serverError 38 } +SELECT toDate('2019^7^8'); + +CREATE TEMPORARY TABLE test (d Date); +INSERT INTO test VALUES ('2018-01-01'); + +SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 53 } +SELECT * FROM test WHERE d >= '2019-07-08'; From 0e621788c7f8821b6a2b3fffb1885f15ba3e5bcb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 03:53:08 +0300 Subject: [PATCH 168/183] Allow constexpr parameters for aggregate functions --- .../parseAggregateFunctionParameters.cpp | 41 ++++++++++--------- .../parseAggregateFunctionParameters.h | 12 ++++-- src/Functions/array/arrayReduce.cpp | 7 ++-- src/Functions/array/arrayReduceInRanges.cpp | 7 ++-- src/Functions/initializeAggregation.cpp | 7 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 4 +- .../evaluateConstantExpression.cpp | 9 ++-- .../MergeTree/registerStorageMergeTree.cpp | 15 ++++--- ...pr_aggregate_function_parameters.reference | 2 + ...onstexpr_aggregate_function_parameters.sql | 11 +++++ 10 files changed, 72 insertions(+), 43 deletions(-) create mode 100644 tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference create mode 100644 tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp index 3826d993c4a..64eb0932de9 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -15,7 +17,7 @@ namespace ErrorCodes extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; } -Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context) +Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context, ContextPtr context) { const ASTs & parameters = expression_list->children; if (parameters.empty()) @@ -25,25 +27,25 @@ Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const for (size_t i = 0; i < parameters.size(); ++i) { - const auto * literal = parameters[i]->as(); - - ASTPtr func_literal; - if (!literal) - if (const auto * func = parameters[i]->as()) - if ((func_literal = func->toLiteral())) - literal = func_literal->as(); - - if (!literal) + ASTPtr literal; + try { - throw Exception( - ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS, - "Parameters to aggregate functions must be literals. " - "Got parameter '{}'{}", - parameters[i]->formatForErrorMessage(), - (error_context.empty() ? "" : " (in " + error_context +")")); + literal = evaluateConstantExpressionAsLiteral(parameters[i], context); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + throw Exception( + ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS, + "Parameters to aggregate functions must be literals. " + "Got parameter '{}'{}", + parameters[i]->formatForErrorMessage(), + (error_context.empty() ? "" : " (in " + error_context +")")); + + throw; } - params_row[i] = literal->value; + params_row[i] = literal->as()->value; } return params_row; @@ -54,7 +56,8 @@ void getAggregateFunctionNameAndParametersArray( const std::string & aggregate_function_name_with_params, std::string & aggregate_function_name, Array & aggregate_function_parameters, - const std::string & error_context) + const std::string & error_context, + ContextPtr context) { if (aggregate_function_name_with_params.back() != ')') { @@ -84,7 +87,7 @@ void getAggregateFunctionNameAndParametersArray( throw Exception("Incorrect list of parameters to aggregate function " + aggregate_function_name, ErrorCodes::BAD_ARGUMENTS); - aggregate_function_parameters = getAggregateFunctionParametersArray(args_ast); + aggregate_function_parameters = getAggregateFunctionParametersArray(args_ast, error_context, context); } } diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.h b/src/AggregateFunctions/parseAggregateFunctionParameters.h index 37f1f1d5097..033e92714dd 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.h +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.h @@ -1,19 +1,23 @@ #pragma once + #include #include +#include namespace DB { -struct Settings; - -Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context = ""); +Array getAggregateFunctionParametersArray( + const ASTPtr & expression_list, + const std::string & error_context, + ContextPtr context); void getAggregateFunctionNameAndParametersArray( const std::string & aggregate_function_name_with_params, std::string & aggregate_function_name, Array & aggregate_function_parameters, - const std::string & error_context); + const std::string & error_context, + ContextPtr context); } diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 9a4b5aafdb9..3387d50a1f6 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -33,11 +33,12 @@ namespace ErrorCodes * arrayReduce('agg', arr1, ...) - apply the aggregate function `agg` to arrays `arr1...` * If multiple arrays passed, then elements on corresponding positions are passed as multiple arguments to the aggregate function. */ -class FunctionArrayReduce : public IFunction +class FunctionArrayReduce : public IFunction, private WithContext { public: static constexpr auto name = "arrayReduce"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + FunctionArrayReduce(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -95,7 +96,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 9a2e8e1ca95..ffb047f2231 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -35,12 +35,13 @@ namespace ErrorCodes * * arrayReduceInRanges('agg', indices, lengths, arr1, ...) */ -class FunctionArrayReduceInRanges : public IFunction +class FunctionArrayReduceInRanges : public IFunction, private WithContext { public: static const size_t minimum_step = 64; static constexpr auto name = "arrayReduceInRanges"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + FunctionArrayReduceInRanges(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -113,7 +114,7 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index b9b3d219551..060788773b6 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -25,11 +25,12 @@ namespace ErrorCodes namespace { -class FunctionInitializeAggregation : public IFunction +class FunctionInitializeAggregation : public IFunction, private WithContext { public: static constexpr auto name = "initializeAggregation"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + FunctionInitializeAggregation(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -78,7 +79,7 @@ DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTy String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 5b2339975c1..e693d4ba988 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -468,7 +468,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) } AggregateFunctionProperties properties; - aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array(); + aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array(); aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties); aggregate_descriptions.push_back(aggregate); @@ -651,7 +651,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) window_function.function_parameters = window_function.function_node->parameters ? getAggregateFunctionParametersArray( - window_function.function_node->parameters) + window_function.function_node->parameters, "", getContext()) : Array(); // Requiring a constant reference to a shared pointer to non-const AST diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 90f6ac84afc..d91ea9208e4 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -49,17 +49,20 @@ std::pair> evaluateConstantExpression(co expr_for_constant_folding->execute(block_with_constants); if (!block_with_constants || block_with_constants.rows() == 0) - throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT or aggregate function parameter", + ErrorCodes::LOGICAL_ERROR); if (!block_with_constants.has(name)) - throw Exception("Element of set in IN, VALUES or LIMIT is not a constant expression (result column not found): " + name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column not found): {}", name); const ColumnWithTypeAndName & result = block_with_constants.getByName(name); const IColumn & result_column = *result.column; /// Expressions like rand() or now() are not constant if (!isColumnConst(result_column)) - throw Exception("Element of set in IN, VALUES or LIMIT is not a constant expression (result column is not const): " + name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column is not const): {}", name); return std::make_pair(result_column[0], result.type); } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index b3b9ce31ff5..539f7713320 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -116,8 +116,11 @@ static bool compareRetentions(const Graphite::Retention & a, const Graphite::Ret * * */ -static void -appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const String & config_element, Graphite::Patterns & patterns) +static void appendGraphitePattern( + const Poco::Util::AbstractConfiguration & config, + const String & config_element, + Graphite::Patterns & out_patterns, + ContextPtr context) { Graphite::Pattern pattern; @@ -137,7 +140,7 @@ appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const St String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray( - aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization"); + aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); /// TODO Not only Float64 AggregateFunctionProperties properties; @@ -181,7 +184,7 @@ appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const St if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); - patterns.emplace_back(pattern); + out_patterns.emplace_back(pattern); } static void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) @@ -204,7 +207,7 @@ static void setGraphitePatternsFromConfig(ContextPtr context, const String & con { if (startsWith(key, "pattern")) { - appendGraphitePattern(config, config_element + "." + key, params.patterns); + appendGraphitePattern(config, config_element + "." + key, params.patterns, context); } else if (key == "default") { @@ -219,7 +222,7 @@ static void setGraphitePatternsFromConfig(ContextPtr context, const String & con } if (config.has(config_element + ".default")) - appendGraphitePattern(config, config_element + "." + ".default", params.patterns); + appendGraphitePattern(config, config_element + "." + ".default", params.patterns, context); } diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference new file mode 100644 index 00000000000..61be3e78ae7 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference @@ -0,0 +1,2 @@ +[0,1,2,3,4] +[0,1,2,3,4] diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql new file mode 100644 index 00000000000..3ab969ca256 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql @@ -0,0 +1,11 @@ +SELECT groupArray(2 + 3)(number) FROM numbers(10); +SELECT groupArray('5'::UInt8)(number) FROM numbers(10); + +SELECT groupArray()(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL + NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray([])(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(throwIf(1))(number) FROM numbers(10); -- { serverError 395 } + +-- Not the best error message, can be improved. +SELECT groupArray(number)(number) FROM numbers(10); -- { serverError 47 } From b8a0b4caf48654aa92d47ce2b2e6e9abafcedd4e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 03:57:46 +0300 Subject: [PATCH 169/183] One more test --- ...ametrized_query_parametric_aggregate_function.reference | 1 + ...935_parametrized_query_parametric_aggregate_function.sh | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference create mode 100755 tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh new file mode 100755 index 00000000000..bbc24af1214 --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -XPOST "${CLICKHOUSE_URL}¶m_lim=2" --data-binary 'select length(topKArray({lim:UInt32})([1,1,2,3,4,5,6,7,7,7]))' From 23912c606609e257fa1ecb282b294674999ee8a2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 2 Jul 2021 04:05:28 +0300 Subject: [PATCH 170/183] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index fdced7f354c..47927cd306a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -155,5 +155,6 @@ toc_title: Adopters | Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | | SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | | ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | +| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) From a41a1b7c74d7eb1a15bc731f902174d628754b07 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 2 Jul 2021 04:25:07 +0300 Subject: [PATCH 171/183] Update ReplxxLineReader.cpp --- base/common/ReplxxLineReader.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 3d665744223..9c65b1dfe4c 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -25,7 +25,10 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } -/// Copied from replxx::src/util.cxx::now_ms_str() +/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. +/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) +/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) +/// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com) std::string replxx_now_ms_str() { std::chrono::milliseconds ms(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch())); From 24759a9b67f5e002349f0161546dce3aa98fcd54 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 2 Jul 2021 04:26:49 +0300 Subject: [PATCH 172/183] Update ReplxxLineReader.cpp --- base/common/ReplxxLineReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 9c65b1dfe4c..de169b1581b 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -29,7 +29,7 @@ void trim(String & s) /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) /// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com) -std::string replxx_now_ms_str() +static std::string replxx_now_ms_str() { std::chrono::milliseconds ms(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch())); time_t t = ms.count() / 1000; From 1d332da0ed8f5e9f042da8584ab8ce21f38cf7b9 Mon Sep 17 00:00:00 2001 From: feng lv Date: Fri, 2 Jul 2021 05:51:53 +0000 Subject: [PATCH 173/183] fix special build on clang 11 --- base/common/ReplxxLineReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index de169b1581b..9c65b1dfe4c 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -29,7 +29,7 @@ void trim(String & s) /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) /// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com) -static std::string replxx_now_ms_str() +std::string replxx_now_ms_str() { std::chrono::milliseconds ms(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch())); time_t t = ms.count() / 1000; From 3ae127839188a1da088c5c1681831f02183ee098 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 2 Jul 2021 11:22:30 +0300 Subject: [PATCH 174/183] Fixed tests --- src/Functions/array/arrayReduce.cpp | 2 +- src/Functions/array/arrayReduceInRanges.cpp | 2 +- tests/queries/skip_list.json | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 3387d50a1f6..e070596e5ee 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -38,7 +38,7 @@ class FunctionArrayReduce : public IFunction, private WithContext public: static constexpr auto name = "arrayReduce"; static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - FunctionArrayReduce(ContextPtr context_) : WithContext(context_) {} + explicit FunctionArrayReduce(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index ffb047f2231..18140fe504d 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -41,7 +41,7 @@ public: static const size_t minimum_step = 64; static constexpr auto name = "arrayReduceInRanges"; static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - FunctionArrayReduceInRanges(ContextPtr context_) : WithContext(context_) {} + explicit FunctionArrayReduceInRanges(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 7c1f998e91d..64aef86ec9c 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -520,7 +520,8 @@ "01914_exchange_dictionaries", "01915_create_or_replace_dictionary", "01913_names_of_tuple_literal", - "01925_merge_prewhere_table" + "01925_merge_prewhere_table", + "01934_constexpr_aggregate_function_parameters" ], "parallel": [ From 55889eacf507f3173c851fc83543d961b59967d8 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 2 Jul 2021 11:24:45 +0300 Subject: [PATCH 175/183] Add test to ANTLR skip list --- tests/queries/skip_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 7c1f998e91d..803199b9121 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -520,7 +520,8 @@ "01914_exchange_dictionaries", "01915_create_or_replace_dictionary", "01913_names_of_tuple_literal", - "01925_merge_prewhere_table" + "01925_merge_prewhere_table", + "01932_null_valid_identifier" ], "parallel": [ From e992ed780a23af724113c9f2f619de2e61a8a06f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 2 Jul 2021 11:30:57 +0300 Subject: [PATCH 176/183] Update rabbitmq.md --- docs/zh/engines/table-engines/integrations/rabbitmq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/integrations/rabbitmq.md b/docs/zh/engines/table-engines/integrations/rabbitmq.md index c43218da14f..a4a5be5f685 100644 --- a/docs/zh/engines/table-engines/integrations/rabbitmq.md +++ b/docs/zh/engines/table-engines/integrations/rabbitmq.md @@ -96,7 +96,7 @@ RabbitMQ 服务器配置应使用 ClickHouse 配置文件添加。 ## 描述 {#description} -`SELECT`对于读取消息不是特别有用(除了调试),因为每个消息只能读取一次。使用[物化视图](../../../sql-reference/statements/create/view.md)创建实时线程更为实用。要做到这一点: +`SELECT`对于读取消息不是特别有用(除了调试),因为每个消息只能读取一次。使用[物化视图](../../../sql-reference/statements/create.md#create-view)创建实时线程更为实用。要做到这一点: 1. 使用引擎创建一个 RabbitMQ 消费者,并将其视为一个数据流。 2. 创建一个具有所需结构的表。 From e4a0e831f0e3a51ddc787475fb7ae7a7b0e7f415 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 2 Jul 2021 14:08:11 +0300 Subject: [PATCH 177/183] Update skip_list.json --- tests/queries/skip_list.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 0cd57ed39fb..be52bee71b1 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -520,7 +520,7 @@ "01914_exchange_dictionaries", "01915_create_or_replace_dictionary", "01913_names_of_tuple_literal", - "01925_merge_prewhere_table" + "01925_merge_prewhere_table", "01932_null_valid_identifier", "01934_constexpr_aggregate_function_parameters" ], From 8b4fabe60ce6aa3c5e62c2bb799ff76a36a71181 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 2 Jul 2021 14:20:41 +0300 Subject: [PATCH 178/183] Fix crash on call dictGet() with bad arguments. --- .../MarkTableIdentifiersVisitor.cpp | 43 +++++++++++-------- .../MarkTableIdentifiersVisitor.h | 2 +- ...arts_identifiers_in_wrong_places.reference | 1 + ...hree_parts_identifiers_in_wrong_places.sql | 7 +++ 4 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference create mode 100644 tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.cpp b/src/Interpreters/MarkTableIdentifiersVisitor.cpp index 52f180aa199..1f418e759e7 100644 --- a/src/Interpreters/MarkTableIdentifiersVisitor.cpp +++ b/src/Interpreters/MarkTableIdentifiersVisitor.cpp @@ -11,6 +11,26 @@ namespace DB { +namespace +{ + void replaceArgumentWithTableIdentifierIfNotAlias(ASTFunction & func, size_t argument_pos, const Aliases & aliases) + { + if (!func.arguments || (func.arguments->children.size() <= argument_pos)) + return; + auto arg = func.arguments->children[argument_pos]; + auto identifier = arg->as(); + if (!identifier) + return; + if (aliases.contains(identifier->name())) + return; + auto table_identifier = identifier->createTable(); + if (!table_identifier) + return; + func.arguments->children[argument_pos] = table_identifier; + } +} + + bool MarkTableIdentifiersMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { if (child->as()) @@ -23,37 +43,22 @@ bool MarkTableIdentifiersMatcher::needChildVisit(ASTPtr & node, const ASTPtr & c void MarkTableIdentifiersMatcher::visit(ASTPtr & ast, Data & data) { if (auto * node_func = ast->as()) - visit(*node_func, ast, data); + visit(*node_func, data); } -void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr & ptr, Data & data) +void MarkTableIdentifiersMatcher::visit(ASTFunction & func, const Data & data) { /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (checkFunctionIsInOrGlobalInOperator(func)) { - auto ast = func.arguments->children.at(1); - auto opt_name = tryGetIdentifierName(ast); - if (opt_name && !data.aliases.count(*opt_name) && ast->as()) - { - ptr->as()->arguments->children[1] = ast->as()->createTable(); - assert(ptr->as()->arguments->children[1]); - } + replaceArgumentWithTableIdentifierIfNotAlias(func, 1, data.aliases); } // First argument of joinGet can be a table name, perhaps with a database. // First argument of dictGet can be a dictionary name, perhaps with a database. else if (functionIsJoinGet(func.name) || functionIsDictGet(func.name)) { - if (!func.arguments || func.arguments->children.empty()) - return; - - auto ast = func.arguments->children.at(0); - auto opt_name = tryGetIdentifierName(ast); - if (opt_name && !data.aliases.count(*opt_name) && ast->as()) - { - ptr->as()->arguments->children[0] = ast->as()->createTable(); - assert(ptr->as()->arguments->children[0]); - } + replaceArgumentWithTableIdentifierIfNotAlias(func, 0, data.aliases); } } diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.h b/src/Interpreters/MarkTableIdentifiersVisitor.h index 0d80b865e53..d05c067397b 100644 --- a/src/Interpreters/MarkTableIdentifiersVisitor.h +++ b/src/Interpreters/MarkTableIdentifiersVisitor.h @@ -24,7 +24,7 @@ public: static void visit(ASTPtr & ast, Data & data); private: - static void visit(const ASTFunction & func, ASTPtr &, Data &); + static void visit(ASTFunction & func, const Data & data); }; using MarkTableIdentifiersVisitor = MarkTableIdentifiersMatcher::Visitor; diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference new file mode 100644 index 00000000000..bbf76e61257 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference @@ -0,0 +1 @@ +still alive diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql new file mode 100644 index 00000000000..d2ca771edc5 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -0,0 +1,7 @@ +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 47 } + +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 47 } + +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 47 } + +SELECT 'still alive'; From 23dd7544922fdc62369a8271169702203419b6e0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 2 Jul 2021 22:26:33 +0300 Subject: [PATCH 179/183] Update libpq --- contrib/libpq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libpq b/contrib/libpq index 69e8a80e98f..e071ea570f8 160000 --- a/contrib/libpq +++ b/contrib/libpq @@ -1 +1 @@ -Subproject commit 69e8a80e98f27e3a5deec617334e31db2b9ed7d7 +Subproject commit e071ea570f8985aa00e34f5b9d50a3cfe666327e From 132edc9e2217ae99e7936560779ff6b2daefa327 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 22:47:43 +0300 Subject: [PATCH 180/183] Allow quantiles* functions to work with `aggregate_functions_null_for_empty` --- .../AggregateFunctionQuantile.cpp | 29 ++++++++++--------- ...936_quantiles_cannot_return_null.reference | 4 +++ .../01936_quantiles_cannot_return_null.sql | 9 ++++++ 3 files changed, 29 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference create mode 100644 tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp index cae0021082f..11b14585653 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -125,44 +125,47 @@ AggregateFunctionPtr createAggregateFunctionQuantile( void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory) { + /// For aggregate functions returning array we cannot return NULL on empty set. + AggregateFunctionProperties properties = { .returns_default_when_only_null = true }; + factory.registerFunction(NameQuantile::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantiles::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantiles::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileDeterministic::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesDeterministic::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesDeterministic::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExact::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExact::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExact::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactLow::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactLow::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactLow::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactHigh::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactHigh::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactHigh::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactExclusive::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactExclusive::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactExclusive::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactInclusive::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactInclusive::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactInclusive::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTiming::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTiming::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTiming::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTimingWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTimingWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTimingWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTDigest::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTDigest::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTDigest::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTDigestWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTDigestWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesBFloat16::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile, properties }); /// 'median' is an alias for 'quantile' factory.registerAlias("median", NameQuantile::name); diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference new file mode 100644 index 00000000000..f9b4a3157f7 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference @@ -0,0 +1,4 @@ +[nan] +[nan] +[nan] +[nan] diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql new file mode 100644 index 00000000000..81ac6224268 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql @@ -0,0 +1,9 @@ +set aggregate_functions_null_for_empty=0; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); + +set aggregate_functions_null_for_empty=1; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); From 7d6e08c6adaf34d9d772ac504f9804ddd0f169d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Jul 2021 23:01:26 +0300 Subject: [PATCH 181/183] Remove obsolete code from init script --- debian/clickhouse-server.init | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index d7d87c6d53c..89f97697c61 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -43,29 +43,6 @@ command -v flock >/dev/null && FLOCK=flock # Override defaults from optional config file test -f /etc/default/clickhouse && . /etc/default/clickhouse -# On x86_64, check for required instruction set. -if uname -mpi | grep -q 'x86_64'; then - if ! grep -q 'sse4_2' /proc/cpuinfo; then - # On KVM, cpuinfo could falsely not report SSE 4.2 support, so skip the check. - if ! grep -q 'Common KVM processor' /proc/cpuinfo; then - - # Some other VMs also report wrong flags in cpuinfo. - # Tricky way to test for instruction set: - # create temporary binary and run it; - # if it get caught illegal instruction signal, - # then required instruction set is not supported really. - # - # Generated this way: - # gcc -xc -Os -static -nostdlib - <<< 'void _start() { __asm__("pcmpgtq %%xmm0, %%xmm1; mov $0x3c, %%rax; xor %%rdi, %%rdi; syscall":::"memory"); }' && strip -R .note.gnu.build-id -R .comment -R .eh_frame -s ./a.out && gzip -c -9 ./a.out | base64 -w0; echo - - if ! (echo -n 'H4sICAwAW1cCA2Eub3V0AKt39XFjYmRkgAEmBjsGEI+H0QHMd4CKGyCUAMUsGJiBJDNQNUiYlQEZOKDQclB9cnD9CmCSBYqJBRxQOvBpSQobGfqIAWn8FuYnPI4fsAGyPQz/87MeZtArziguKSpJTGLQK0mtKGGgGHADMSgoYH6AhTMPNHyE0NQzYuEzYzEXFr6CBPQDANAsXKTwAQAA' | base64 -d | gzip -d > /tmp/clickhouse_test_sse42 && chmod a+x /tmp/clickhouse_test_sse42 && /tmp/clickhouse_test_sse42); then - echo 'Warning! SSE 4.2 instruction set is not supported' - #exit 3 - fi - fi - fi -fi - die() { From 9f52e64805c6c4dd832b54c48543c4183e3a167e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 3 Jul 2021 01:22:04 +0300 Subject: [PATCH 182/183] FunctionInitializeAggregation build fix --- src/Functions/initializeAggregation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index 060788773b6..b097d81e385 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -30,7 +30,7 @@ class FunctionInitializeAggregation : public IFunction, private WithContext public: static constexpr auto name = "initializeAggregation"; static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - FunctionInitializeAggregation(ContextPtr context_) : WithContext(context_) {} + explicit FunctionInitializeAggregation(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } From acd1342df8444f136df814ac5c4d5df3b93cc6ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Jul 2021 04:54:43 +0300 Subject: [PATCH 183/183] Skip test for ANTLR #25904 --- tests/queries/skip_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index be52bee71b1..e0a96ef8ded 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -522,7 +522,8 @@ "01913_names_of_tuple_literal", "01925_merge_prewhere_table", "01932_null_valid_identifier", - "01934_constexpr_aggregate_function_parameters" + "01934_constexpr_aggregate_function_parameters", + "01932_alter_index_with_order" ], "parallel": [