From 534e199c43651507216f912f86dbc59510edcc6e Mon Sep 17 00:00:00 2001 From: Daria Mozhaeva Date: Wed, 30 Sep 2020 11:32:57 +0400 Subject: [PATCH 01/94] Edit and translate to Russian. --- .../settings.md | 8 +- docs/en/operations/settings/settings.md | 98 +++---- docs/en/sql-reference/statements/system.md | 6 +- .../settings.md | 6 +- docs/ru/operations/settings/settings.md | 242 +++++++++++++----- 5 files changed, 235 insertions(+), 125 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index c1ac1d0d92d..d89f74f6bdc 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -351,15 +351,15 @@ Keys for syslog: ## send\_crash\_reports {#server_configuration_parameters-logger} Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). -Enabling it, especially in pre-production environments, is greatly appreciated. +Enabling it, especially in pre-production environments, is highly appreciated. -The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly. +The server will need access to the public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly. Keys: - `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. -- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. -- `anonymize` - Avoid attaching the server hostname to crash report. +- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. +- `anonymize` - Avoid attaching the server hostname to the crash report. - `http_proxy` - Configure HTTP proxy for sending crash reports. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4995c04f712..ee7eb1fd6be 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2,7 +2,7 @@ ## distributed\_product\_mode {#distributed-product-mode} -Changes the behavior of [distributed subqueries](../../sql-reference/operators/in.md). +Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table. @@ -42,7 +42,7 @@ Consider the following queries: If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal because ClickHouse applies `WHERE` to the subquery when processing it. -If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. +If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes. ## fallback\_to\_stale\_replicas\_for\_distributed\_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} @@ -215,7 +215,7 @@ Ok. ## input\_format\_values\_deduce\_templates\_of\_expressions {#settings-input_format_values_deduce_templates_of_expressions} -Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. +Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. Possible values: @@ -236,7 +236,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input\_format\_values\_accurate\_types\_of\_literals {#settings-input-format-values-accurate-types-of-literals} -This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g. +This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g. ``` sql (..., abs(0), ...), -- UInt64 literal @@ -278,7 +278,7 @@ Disabled by default. ## input\_format\_null\_as\_default {#settings-input-format-null-as-default} -Enables or disables using default values if input data contain `NULL`, but data type of the corresponding column in not `Nullable(T)` (for text input formats). +Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats). ## input\_format\_skip\_unknown\_fields {#settings-input-format-skip-unknown-fields} @@ -395,7 +395,7 @@ See also: ## join\_use\_nulls {#join_use_nulls} -Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. +Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. Possible values: @@ -424,8 +424,8 @@ Limits sizes of right-hand join data blocks in partial merge join algorithm for ClickHouse server: 1. Splits right-hand join data into blocks with up to the specified number of rows. -2. Indexes each block with their minimum and maximum values -3. Unloads prepared blocks to disk if possible. +2. Indexes each block with its minimum and maximum values. +3. Unloads prepared blocks to disk if it is possible. Possible values: @@ -447,25 +447,25 @@ Default value: 64. ## any\_join\_distinct\_right\_table\_keys {#any_join_distinct_right_table_keys} -Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations. +Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations. !!! note "Warning" - Use this setting only for the purpose of backward compatibility if your use cases depend on legacy `JOIN` behavior. + Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour. -When the legacy behavior enabled: +When the legacy behaviour enabled: - Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. - Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. -When the legacy behavior disabled: +When the legacy behaviour disabled: - Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. -- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables. +- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. Possible values: -- 0 — Legacy behavior is disabled. -- 1 — Legacy behavior is enabled. +- 0 — Legacy behaviour is disabled. +- 1 — Legacy behaviour is enabled. Default value: 0. @@ -634,7 +634,7 @@ Possible values: Default value: `QUERY_START`. -Can be used to limit which entiries will goes to `query_log`, say you are interesting only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`: +Can be used to limit which entities will go to `query_log`, say you are interested only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`: ``` text log_queries_min_type='EXCEPTION_WHILE_PROCESSING' @@ -662,11 +662,11 @@ The setting also doesn’t have a purpose when using INSERT SELECT, since data i Default value: 1,048,576. -The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion and a large enough block size allow sorting more data in RAM. +The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM. ## min\_insert\_block\_size\_rows {#min-insert-block-size-rows} -Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. +Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. Possible values: @@ -677,7 +677,7 @@ Default value: 1048576. ## min\_insert\_block\_size\_bytes {#min-insert-block-size-bytes} -Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. +Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. Possible values: @@ -754,7 +754,7 @@ Default value: 256 KiB. ## max\_parser\_depth {#max_parser_depth} -Limits maximum recursion depth in the recursive descent parser. Allows to control stack size. +Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size. Possible values: @@ -865,12 +865,12 @@ Yandex.Metrica uses this parameter set to 1 for implementing suggestions for seg ## replace\_running\_query\_max\_wait\_ms {#replace-running-query-max-wait-ms} -The wait time for running query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active. +The wait time for running the query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active. Possible values: - Positive integer. -- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`. +- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`. Default value: 5000. @@ -946,7 +946,7 @@ The `first_or_random` algorithm solves the problem of the `in_order` algorithm. load_balancing = round_robin ``` -This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). +This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). ## prefer\_localhost\_replica {#settings-prefer-localhost-replica} @@ -983,7 +983,7 @@ Replica lag is not controlled. Enable compilation of queries. By default, 0 (disabled). The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). -If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. +If this portion of the pipeline was compiled, the query may run faster due to the deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. ## min\_count\_to\_compile {#min-count-to-compile} @@ -1099,7 +1099,7 @@ When `output_format_json_quote_denormals = 1`, the query returns: ## format\_csv\_delimiter {#settings-format_csv_delimiter} -The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. +The character is interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. ## input\_format\_csv\_unquoted\_null\_literal\_as\_null {#settings-input_format_csv_unquoted_null_literal_as_null} @@ -1142,7 +1142,7 @@ See also: ## insert\_quorum\_timeout {#settings-insert_quorum_timeout} -Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. +Write to a quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. Default value: 60 seconds. @@ -1198,8 +1198,8 @@ Default value: 0. Usage By default, deduplication is not performed for materialized views but is done upstream, in the source table. -If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. -At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform deduplication check by itself, +If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable the insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. +At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself, ignoring check result for the source table, and will insert rows lost because of the first failure. ## max\_network\_bytes {#settings-max-network-bytes} @@ -1355,7 +1355,7 @@ Default value: 0. - Type: seconds - Default value: 60 seconds -Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after last error. +Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after the last error. See also: @@ -1369,7 +1369,7 @@ See also: - Type: unsigned int - Default value: 1000 -Error count of each replica is capped at this value, preventing a single replica from accumulating too many errors. +The error count of each replica is capped at this value, preventing a single replica from accumulating too many errors. See also: @@ -1383,7 +1383,7 @@ See also: - Type: unsigned int - Default value: 0 -Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm). +The number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm). See also: @@ -1414,7 +1414,7 @@ Default value: 30000 milliseconds (30 seconds). ## distributed\_directory\_monitor\_batch\_inserts {#distributed_directory_monitor_batch_inserts} -Enables/disables sending of inserted data in batches. +Enables/disables inserted data sending in batches. When batch sending is enabled, the [Distributed](../../engines/table-engines/special/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources. @@ -1507,7 +1507,7 @@ Default value: 0. - Type: bool - Default value: True -Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV and JSONEachRow formats. +Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV, and JSONEachRow formats. ## min\_chunk\_bytes\_for\_parallel\_parsing {#min-chunk-bytes-for-parallel-parsing} @@ -1559,7 +1559,7 @@ Default value: 0. ## background\_pool\_size {#background_pool_size} -Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. +Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from the `default` profile at the ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. Before changing it, please also take a look at related [MergeTree settings](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree), such as `number_of_free_entries_in_pool_to_lower_max_size_of_merge` and `number_of_free_entries_in_pool_to_execute_mutation`. @@ -1578,8 +1578,8 @@ If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table Possible values: - 0 — Disabled. -- 1 — `SELECT` will be executed on each shard from underlying table of the distributed engine. -- 2 — `SELECT` and `INSERT` will be executed on each shard from/to underlying table of the distributed engine. +- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. +- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. Default value: 0. @@ -1602,7 +1602,7 @@ Default value: `0`. - [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed) ## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size} -Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session. +Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. Possible values: @@ -1612,7 +1612,7 @@ Default value: 16. ## background\_move\_pool\_size {#background_move_pool_size} -Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session. +Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. Possible values: @@ -1634,7 +1634,7 @@ Default value: 16. Prohibits data parts merging in [Replicated\*MergeTree](../../engines/table-engines/mergetree-family/replication.md)-engine tables. -When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage. +When merging is prohibited, the replica never merges parts and always downloads merged parts from other replicas. If there is no required data yet, the replica waits for it. CPU and disk load on the replica server decreases, but the network load on the cluster increases. This setting can be useful on servers with relatively weak CPUs or slow disks, such as servers for backups storage. Possible values: @@ -1649,7 +1649,7 @@ Default value: 0. ## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size} -Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and can’t be changed in a user session. +Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at the ClickHouse server start and can’t be changed in a user session. Possible values: @@ -1740,7 +1740,7 @@ Default value: 8192. Turns on or turns off using of single dictionary for the data part. -By default, ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`. +By default, the ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`. Possible values: @@ -1785,7 +1785,7 @@ Default value: 0. ## min\_insert\_block\_size\_rows\_for\_materialized\_views {#min-insert-block-size-rows-for-materialized-views} -Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. +Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Possible values: @@ -1800,7 +1800,7 @@ Default value: 1048576. ## min\_insert\_block\_size\_bytes\_for\_materialized\_views {#min-insert-block-size-bytes-for-materialized-views} -Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. +Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Possible values: @@ -1815,7 +1815,7 @@ Default value: 268435456. ## output\_format\_pretty\_grid\_charset {#output-format-pretty-grid-charset} -Allows to change a charset which is used for printing grids borders. Available charsets are following: UTF-8, ASCII. +Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII. **Example** @@ -1872,12 +1872,12 @@ When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired. -Dropping whole parts instead of partial cleaning TTL-d rows allows to have shorter `merge_with_ttl_timeout` times and lower impact on system performance. +Dropping whole parts instead of partial cleaning TTL-d rows allows having shorter `merge_with_ttl_timeout` times and lower impact on system performance. Possible values: -- 0 — Complete dropping of data parts is disabled. -- 1 — Complete dropping of data parts is enabled. +- 0 — The complete dropping of data parts is disabled. +- 1 — The complete dropping of data parts is enabled. Default value: `0`. @@ -1888,9 +1888,9 @@ Default value: `0`. ## lock_acquire_timeout {#lock_acquire_timeout} -Defines how many seconds locking request waits before failing. +Defines how many seconds a locking request waits before failing. -Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When timeout expires and locking request fails, the ClickHouse server throws an exeption "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`. +Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`. Possible values: diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index f6ff264e827..a9f9b718de6 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -81,12 +81,12 @@ SYSTEM DROP REPLICA 'replica_name'; SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk'; ``` -Queries will remove the replica path in ZooKeeper. It’s useful when replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it can’t drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk. +Queries will remove the replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk. The first one removes metadata of `'replica_name'` replica of `database.table` table. The second one does the same for all replicated tables in the database. -The third one does the same for all replicated tables on local server. -The forth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation. +The third one does the same for all replicated tables on the local server. +The fourth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation. ## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache} diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 795a9f5893a..0abb568ffc7 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -492,11 +492,11 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ## max\_thread\_pool\_size {#max-thread-pool-size} -Максимальное кол-во потоков в глобальном пуле потоков. +Максимальное количество потоков в глобальном пуле потоков. -Default value: 10000. +Значение по умолчанию: 10000. -**Example** +**Пример** ``` xml 12000 diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 9a487b6c166..15c4139a3f3 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -281,6 +281,14 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Значение по умолчанию: 1. +## input\_format\_tsv\_empty\_as\_default {#settings-input-format-tsv-empty-as-default} + +Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. + +По умолчанию отключена. + +Disabled by default. + ## input\_format\_null\_as\_default {#settings-input-format-null-as-default} Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов). @@ -369,7 +377,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join). -Возможные значения +Возможные значения: - `ALL` — если в правой таблице несколько совпадающих строк, данные умножаются на количество этих строк. Это нормальное поведение `JOIN` как в стандартном SQL. - `ANY` — если в правой таблице несколько соответствующих строк, то соединяется только первая найденная. Если в «правой» таблице есть не более одной подходящей строки, то результаты `ANY` и `ALL` совпадают. @@ -520,6 +528,31 @@ ClickHouse использует этот параметр при чтении д Значение по умолчанию: 0. +## network_compression_method {#network_compression_method} + +Устанавливает метод сжатия данных, который используется для обмена данными между серверами и между сервером и [clickhouse-client](../../interfaces/cli.md). + +Возможные значения: + +- `LZ4` — устанавливает метод сжатия LZ4. +- `ZSTD` — устанавливает метод сжатия ZSTD. + +Значение по умолчанию: `LZ4`. + +**См. также** + +- [network_zstd_compression_level](#network_zstd_compression_level) + +## network_zstd_compression_level {#network_zstd_compression_level} + +Регулирует уровень сжатия ZSTD. Используется только тогда, когда [network_compression_method](#network_compression_method) установлен на `ZSTD`. + +Возможные значения: + +- Положительное целое число от 1 до 15. + +Значение по умолчанию: `1`. + ## log\_queries {#settings-log-queries} Установка логирования запроса. @@ -534,42 +567,6 @@ log_queries=1 ## log\_queries\_min\_type {#settings-log-queries-min-type} -`query_log` минимальный уровень логирования. - -Возможные значения: -- `QUERY_START` (`=1`) -- `QUERY_FINISH` (`=2`) -- `EXCEPTION_BEFORE_START` (`=3`) -- `EXCEPTION_WHILE_PROCESSING` (`=4`) - -Значение по умолчанию: `QUERY_START`. - -Можно использовать для ограничения того, какие объекты будут записаны в `query_log`, например, если вас интересуют ошибки, тогда вы можете использовать `EXCEPTION_WHILE_PROCESSING`: - -``` text -log_queries_min_type='EXCEPTION_WHILE_PROCESSING' -``` - -## log\_queries\_min\_type {#settings-log-queries-min-type} - -`query_log` минимальный уровень логирования. - -Возможные значения: -- `QUERY_START` (`=1`) -- `QUERY_FINISH` (`=2`) -- `EXCEPTION_BEFORE_START` (`=3`) -- `EXCEPTION_WHILE_PROCESSING` (`=4`) - -Значение по умолчанию: `QUERY_START`. - -Можно использовать для ограничения того, какие объекты будут записаны в `query_log`, например, если вас интересуют ошибки, тогда вы можете использовать `EXCEPTION_WHILE_PROCESSING`: - -``` text -log_queries_min_type='EXCEPTION_WHILE_PROCESSING' -``` - -## log\_queries\_min\_type {#settings-log-queries-min-type} - Задаёт минимальный уровень логирования в `query_log`. Возможные значения: @@ -839,6 +836,11 @@ ClickHouse поддерживает следующие алгоритмы выб - [Nearest hostname](#load_balancing-nearest_hostname) - [In order](#load_balancing-in_order) - [First or random](#load_balancing-first_or_random) +- [Round robin](#load_balancing-round_robin) + +См. также: + +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) ### Random (by Default) {#load_balancing-random} @@ -882,6 +884,14 @@ load_balancing = first_or_random Алгоритм `first or random` решает проблему алгоритма `in order`. При использовании `in order`, если одна реплика перестаёт отвечать, то следующая за ней принимает двойную нагрузку, в то время как все остальные обрабатываю свой обычный трафик. Алгоритм `first or random` равномерно распределяет нагрузку между репликами. +### Round Robin {#load_balancing-round_robin} + +``` sql +load_balancing = round_robin +``` + +Этот алгоритм использует циклический перебор реплик с одинаковым количеством ошибок (учитываются только запросы с алгоритмом `round_robin`). + ## prefer\_localhost\_replica {#settings-prefer-localhost-replica} Включает или выключает предпочтительное использование localhost реплики при обработке распределенных запросов. @@ -1292,6 +1302,48 @@ ClickHouse генерирует исключение Значение по умолчанию: 0. +## distributed\_replica\_error\_half\_life {#settings-distributed_replica_error_half_life} + +- Тип: секунды +- Значение по умолчанию: 60 секунд + +Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed\_replica\_error\_half\_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки. + +См. также: + +- [load\_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap) +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) + +## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap} + +- Тип: unsigned int +- Значение по умолчанию: 1000 + +Счетчик ошибок каждой реплики ограничен этим значением, чтобы одна реплика не накапливала слишком много ошибок. + +См. также: + +- [load\_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life) +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) + +## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors} + +- Тип: unsigned int +- Значение по умолчанию: 0 + +Количество ошибок, которые будут проигнорированы при выборе реплик (согласно алгоритму `load_balancing`). + +См. также: + +- [load\_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap) +- [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life) + ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms} Основной интервал отправки данных движком таблиц [Distributed](../../engines/table-engines/special/distributed.md). Фактический интервал растёт экспоненциально при возникновении ошибок. @@ -1342,65 +1394,103 @@ ClickHouse генерирует исключение ## query\_profiler\_real\_time\_period\_ns {#query_profiler_real_time_period_ns} -Sets the period for a real clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Real clock timer counts wall-clock time. +Устанавливает период для таймера реального времени [профилировщика запросов](../../operations/optimizing-performance/sampling-query-profiler.md). Таймер реального времени считает wall-clock time. -Possible values: +Возможные значения: -- Positive integer number, in nanoseconds. +- Положительное целое число в наносекундах. - Recommended values: + Рекомендуемые значения: - - 10000000 (100 times a second) nanoseconds and less for single queries. - - 1000000000 (once a second) for cluster-wide profiling. + - 10000000 (100 раз в секунду) наносекунд и меньшее значение для одиночных запросов. + - 1000000000 (раз в секунду) для профилирования в масштабе кластера. -- 0 for turning off the timer. +- 0 для выключения таймера. -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). -Default value: 1000000000 nanoseconds (once a second). +Значение по умолчанию: 1000000000 наносекунд (раз в секунду). -See also: +См. также: -- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) +- Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns} -Sets the period for a CPU clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). This timer counts only CPU time. +Устанавливает период для таймера CPU [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Этот таймер считает только время CPU. -Possible values: +Возможные значения: -- Positive integer number of nanoseconds. +- Положительное целое число в наносекундах. - Recommended values: + Рекомендуемые значения: - - 10000000 (100 times a second) nanosecods and more for for single queries. - - 1000000000 (once a second) for cluster-wide profiling. + - 10000000 (100 раз в секунду) наносекунд и большее значение для одиночных запросов. + - 1000000000 (раз в секунду) для профилирования в масштабе кластера. -- 0 for turning off the timer. +- 0 для выключения таймера. -Type: [UInt64](../../sql-reference/data-types/int-uint.md). +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). -Default value: 1000000000 nanoseconds. +Значение по умолчанию: 1000000000 наносекунд. -See also: +См. также: -- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) +- Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## allow_introspection_functions {#settings-allow_introspection_functions} -Enables of disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling. +Включает или отключает [функции самоанализа](../../sql-reference/functions/introspection.md) для профилирования запросов. -Possible values: +Возможные значения: -- 1 — Introspection functions enabled. -- 0 — Introspection functions disabled. +- 1 — включены функции самоанализа. +- 0 — функции самоанализа отключены. -Default value: 0. +Значение по умолчанию: 0. -**See Also** +**См. также** - [Sampling Query Profiler](../optimizing-performance/sampling-query-profiler.md) -- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) +- Системная таблица [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) + +## input\_format\_parallel\_parsing {#input-format-parallel-parsing} + +- Тип: bool +- Значение по умолчанию: True + +Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. + +## min\_chunk\_bytes\_for\_parallel\_parsing {#min-chunk-bytes-for-parallel-parsing} + +- Тип: unsigned int +- Значение по умолчанию: 1 MiB + +Минимальный размер блока в байтах, который каждый поток будет анализировать параллельно. + +## output\_format\_avro\_codec {#settings-output_format_avro_codec} + +Устанавливает кодек сжатия, используемый для вывода файла Avro. + +Тип: строка + +Возможные значения: + +- `null` — без сжатия +- `deflate` — сжать с помощью Deflate (zlib) +- `snappy` — сжать с помощью [Snappy](https://google.github.io/snappy/) + +Значение по умолчанию: `snappy` (если доступно) или `deflate`. + +## output\_format\_avro\_sync\_interval {#settings-output_format_avro_sync_interval} + +Устанавливает минимальный размер данных (в байтах) между маркерами синхронизации для выходного файла Avro. + +Тип: unsigned int + +озможные значения: 32 (32 байта) - 1073741824 (1 GiB) + +Значение по умолчанию: 32768 (32 KiB) ## background\_pool\_size {#background_pool_size} @@ -1624,6 +1714,26 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; - [min_insert_block_size_bytes](#min-insert-block-size-bytes) +## output\_format\_pretty\_grid\_charset {#output-format-pretty-grid-charset} + +Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII. + +**Пример** + +``` text +SET output_format_pretty_grid_charset = 'UTF-8'; +SELECT * FROM a; +┌─a─┐ +│ 1 │ +└───┘ + +SET output_format_pretty_grid_charset = 'ASCII'; +SELECT * FROM a; ++-a-+ +| 1 | ++---+ +``` + ## optimize_read_in_order {#optimize_read_in_order} Включает или отключает оптимизацию в запросах [SELECT](../../sql-reference/statements/select/index.md) с секцией [ORDER BY](../../sql-reference/statements/select/order-by.md#optimize_read_in_order) при работе с таблицами семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). From 66e1072c2cac2bd6a716f4d5286244031863e2c2 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 14 Jan 2021 00:46:55 +0800 Subject: [PATCH 02/94] Add the function to read file as a String. --- src/Functions/FunctionFile.cpp | 121 ++++++++++++++++++++++++++ src/Functions/FunctionsConversion.cpp | 4 +- 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 src/Functions/FunctionFile.cpp diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp new file mode 100644 index 00000000000..8c29a9a39df --- /dev/null +++ b/src/Functions/FunctionFile.cpp @@ -0,0 +1,121 @@ +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int TOO_LARGE_STRING_SIZE; + extern const int NOT_IMPLEMENTED; +} + + +/** Conversion to fixed string is implemented only for strings. + */ +class FunctionFromFile : public IFunction +{ +public: + static constexpr auto name = "file"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } + //static FunctionPtr create(const Context & context) { return std::make_shared(context); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + //bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!isStringOrFixedString(arguments[0].type)) + throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); + //??how to get accurate length here? or should we return normal string type? + //return std::make_shared(1); + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & column = arguments[0].column; + const char * filename = nullptr; + // if (const auto * column_string = checkAndGetColumnConst(column.get())) + if (const auto * column_string = checkAndGetColumn(column.get())) + { + const auto & filename_chars = column_string->getChars(); + filename = reinterpret_cast(&filename_chars[0]); + + /* + //get file path + auto user_files_path = Context::getUserFilesPath(); + + + String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); + Poco::Path poco_path = Poco::Path(table_path); + if (poco_path.isRelative()) + poco_path = Poco::Path(user_files_absolute_path, poco_path); + else //need to judge if the absolute path is in userfilespath? + const String path = poco_path.absolute().toString(); + +*/ + auto fd = open(filename, O_RDONLY); + if (fd == -1) + {//arguments[0].column->getName() + throw Exception("Can't open " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); //ErrorCode need to be rectify + } + struct stat file_stat; + if (fstat(fd, &file_stat) == -1) + { + throw Exception("Can't stat " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + } + auto file_length = static_cast(file_stat.st_size); + auto res = ColumnString::create(); + auto & res_chars = res->getChars(); + auto & res_offsets = res->getOffsets(); + //res_chars.resize_fill(file_length + 1); + //omit the copy op to only once. + res_chars.resize_exact(file_length + 1); + res_offsets.push_back(file_length + 1); + char * buf = reinterpret_cast(&res_chars[0]); + ssize_t bytes_read = pread(fd, buf, file_length, 0); + + if (bytes_read == -1) + { + throw Exception("Bad read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + } + if (static_cast(bytes_read) != file_length) + { + throw Exception("Short read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + } + buf[file_length] = '\0'; + close(fd); + return res; + } + else + { + throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + } + } +}; + + + +void registerFunctionFromFile(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} \ No newline at end of file diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 257b852ecd8..a6866ce0939 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -6,6 +6,7 @@ namespace DB { void registerFunctionFixedString(FunctionFactory & factory); +void registerFunctionFromFile(FunctionFactory & factory); void registerFunctionsConversion(FunctionFactory & factory) { @@ -36,7 +37,8 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); registerFunctionFixedString(factory); - + registerFunctionFromFile(factory); + factory.registerFunction(); factory.registerFunction>(FunctionFactory::CaseInsensitive); From 701b61dcedef91f88808647cbcb141369a47bf24 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 14 Jan 2021 13:36:22 +0800 Subject: [PATCH 03/94] Function arguments declaration Upgrade with super class --- src/Functions/FunctionFile.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 8c29a9a39df..2a524adde47 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -47,8 +47,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - ColumnPtr executeImpl(ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & column = arguments[0].column; const char * filename = nullptr; From e95b8089cd0384090b8808d98723a4ad4cd414be Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 14 Jan 2021 18:44:16 +0800 Subject: [PATCH 04/94] Make code clean including properly exception handle --- src/Functions/FunctionFile.cpp | 75 +++++++++++++--------------------- 1 file changed, 29 insertions(+), 46 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 2a524adde47..e856befa9d1 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -1,7 +1,5 @@ -//#include #include #include -#include #include #include #include @@ -18,88 +16,74 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int TOO_LARGE_STRING_SIZE; extern const int NOT_IMPLEMENTED; + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_FSTAT; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; } -/** Conversion to fixed string is implemented only for strings. +/** A function to read file as a string. */ -class FunctionFromFile : public IFunction +class FunctionFile : public IFunction { public: static constexpr auto name = "file"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } - //static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } - //bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isStringOrFixedString(arguments[0].type)) throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); - //??how to get accurate length here? or should we return normal string type? - //return std::make_shared(1); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } - //ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const auto & column = arguments[0].column; const char * filename = nullptr; - // if (const auto * column_string = checkAndGetColumnConst(column.get())) + if (const auto * column_string = checkAndGetColumn(column.get())) { const auto & filename_chars = column_string->getChars(); filename = reinterpret_cast(&filename_chars[0]); - /* - //get file path - auto user_files_path = Context::getUserFilesPath(); - - - String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); - Poco::Path poco_path = Poco::Path(table_path); - if (poco_path.isRelative()) - poco_path = Poco::Path(user_files_absolute_path, poco_path); - else //need to judge if the absolute path is in userfilespath? - const String path = poco_path.absolute().toString(); - -*/ auto fd = open(filename, O_RDONLY); - if (fd == -1) - {//arguments[0].column->getName() - throw Exception("Can't open " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); //ErrorCode need to be rectify - } + if (-1 == fd) + throwFromErrnoWithPath("Cannot open file " + std::string(filename), std::string(filename), + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); struct stat file_stat; - if (fstat(fd, &file_stat) == -1) - { - throw Exception("Can't stat " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } + if (-1 == fstat(fd, &file_stat)) + throwFromErrnoWithPath("Cannot stat file " + std::string(filename), std::string(filename), + ErrorCodes::CANNOT_FSTAT); + auto file_length = static_cast(file_stat.st_size); auto res = ColumnString::create(); auto & res_chars = res->getChars(); auto & res_offsets = res->getOffsets(); - //res_chars.resize_fill(file_length + 1); - //omit the copy op to only once. res_chars.resize_exact(file_length + 1); res_offsets.push_back(file_length + 1); - char * buf = reinterpret_cast(&res_chars[0]); - ssize_t bytes_read = pread(fd, buf, file_length, 0); + char * res_buf = reinterpret_cast(&res_chars[0]); + //To read directly into the String buf, avoiding one redundant copy + ssize_t bytes_read = pread(fd, res_buf, file_length, 0); if (bytes_read == -1) - { - throw Exception("Bad read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } + throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename), + errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN); if (static_cast(bytes_read) != file_length) - { - throw Exception("Short read of " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } - buf[file_length] = '\0'; + throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + + res_buf[file_length] = '\0'; close(fd); return res; } @@ -111,10 +95,9 @@ public: }; - void registerFunctionFromFile(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); } } \ No newline at end of file From 791a4cfb52b27d511a24c9e74a479bef8a15f20d Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 14 Jan 2021 19:46:19 +0800 Subject: [PATCH 05/94] Small fix --- src/Functions/FunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index e856befa9d1..f491ad54bf2 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -77,7 +77,7 @@ public: //To read directly into the String buf, avoiding one redundant copy ssize_t bytes_read = pread(fd, res_buf, file_length, 0); - if (bytes_read == -1) + if (-1 == bytes_read) throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename), errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN); if (static_cast(bytes_read) != file_length) From 53e483d36c24c821e714d3c5224ea8b9d1e17670 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 14 Jan 2021 20:09:13 +0800 Subject: [PATCH 06/94] Small fix --- src/Functions/FunctionFile.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index f491ad54bf2..317bc46364a 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes extern const int CANNOT_CLOSE_FILE; extern const int CANNOT_FSTAT; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_CLOSE_FILE; } @@ -84,7 +85,10 @@ public: throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN); res_buf[file_length] = '\0'; - close(fd); + if (0 != close(fd)) + throw Exception("Cannot close file " + std::string(filename), ErrorCodes::CANNOT_CLOSE_FILE); + fd = -1; + return res; } else From 4b6cc4ea4bf6ff293207f3fbbf91a53ff6ce4528 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 14 Jan 2021 23:48:38 +0800 Subject: [PATCH 07/94] Add Function to read file as a String, Using ReadBuffer. --- src/Functions/FunctionFile.cpp | 159 ++++++++++++++------------------- 1 file changed, 67 insertions(+), 92 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 317bc46364a..c2757798584 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -1,107 +1,82 @@ #include #include -#include #include -#include -#include -#include -#include -#include +#include +#include + namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int TOO_LARGE_STRING_SIZE; - extern const int NOT_IMPLEMENTED; - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_FSTAT; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_CLOSE_FILE; -} + namespace ErrorCodes + { + extern const int ILLEGAL_COLUMN; + extern const int NOT_IMPLEMENTED; + } /** A function to read file as a string. */ -class FunctionFile : public IFunction -{ -public: - static constexpr auto name = "file"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + class FunctionFile : public IFunction { - if (!isStringOrFixedString(arguments[0].type)) - throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(); + public: + static constexpr auto name = "file"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!isStringOrFixedString(arguments[0].type)) + throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & column = arguments[0].column; + const char * filename = nullptr; + if (const auto * column_string = checkAndGetColumn(column.get())) + { + const auto & filename_chars = column_string->getChars(); + filename = reinterpret_cast(&filename_chars[0]); + auto res = ColumnString::create(); + auto & res_chars = res->getChars(); + auto & res_offsets = res->getOffsets(); + + ReadBufferFromFile in(filename); + char *res_buf; + size_t file_len = 0, rlen = 0; + while (0 == file_len || 4096 == rlen) + { + file_len += rlen; + res_chars.resize(4096 + file_len); + res_buf = reinterpret_cast(&res_chars[0]); + rlen = in.read(res_buf + file_len, 4096); + } + file_len += rlen; + res_offsets.push_back(file_len + 1); + res_buf[file_len] = '\0'; + + return res; + } + else + { + throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); + } + } + }; + + void registerFunctionFromFile(FunctionFactory & factory) + { + factory.registerFunction(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const auto & column = arguments[0].column; - const char * filename = nullptr; - - if (const auto * column_string = checkAndGetColumn(column.get())) - { - const auto & filename_chars = column_string->getChars(); - filename = reinterpret_cast(&filename_chars[0]); - - auto fd = open(filename, O_RDONLY); - if (-1 == fd) - throwFromErrnoWithPath("Cannot open file " + std::string(filename), std::string(filename), - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); - struct stat file_stat; - if (-1 == fstat(fd, &file_stat)) - throwFromErrnoWithPath("Cannot stat file " + std::string(filename), std::string(filename), - ErrorCodes::CANNOT_FSTAT); - - auto file_length = static_cast(file_stat.st_size); - auto res = ColumnString::create(); - auto & res_chars = res->getChars(); - auto & res_offsets = res->getOffsets(); - res_chars.resize_exact(file_length + 1); - res_offsets.push_back(file_length + 1); - char * res_buf = reinterpret_cast(&res_chars[0]); - - //To read directly into the String buf, avoiding one redundant copy - ssize_t bytes_read = pread(fd, res_buf, file_length, 0); - if (-1 == bytes_read) - throwFromErrnoWithPath("Read failed for " + std::string(filename), std::string(filename), - errno == EBADF ? ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR : ErrorCodes::ILLEGAL_COLUMN); - if (static_cast(bytes_read) != file_length) - throwFromErrnoWithPath("Cannot read all bytes from " + std::string(filename), std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - - res_buf[file_length] = '\0'; - if (0 != close(fd)) - throw Exception("Cannot close file " + std::string(filename), ErrorCodes::CANNOT_CLOSE_FILE); - fd = -1; - - return res; - } - else - { - throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } - } -}; - - -void registerFunctionFromFile(FunctionFactory & factory) -{ - factory.registerFunction(); } - -} \ No newline at end of file From d98cac0dd32b26e56ac0f40a3df074fafe0e1be4 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Fri, 15 Jan 2021 14:27:38 +0800 Subject: [PATCH 08/94] Add another method for reading file at once to avoid frequently realloc and mem move --- src/Functions/FunctionFile.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index c2757798584..1450b748955 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -50,18 +51,33 @@ namespace DB auto res = ColumnString::create(); auto & res_chars = res->getChars(); auto & res_offsets = res->getOffsets(); - + + //TBD: Here, need to restrict the access permission for only user_path... + ReadBufferFromFile in(filename); + + // Method-1: Read the whole file at once + size_t file_len = Poco::File(filename).getSize(); + res_chars.resize(file_len + 1); + char *res_buf = reinterpret_cast(&res_chars[0]); + in.readStrict(res_buf, file_len); + + /* + //Method-2: Read with loop + char *res_buf; - size_t file_len = 0, rlen = 0; - while (0 == file_len || 4096 == rlen) + size_t file_len = 0, rlen = 0, bsize = 4096; + while (0 == file_len || rlen == bsize) { file_len += rlen; - res_chars.resize(4096 + file_len); + res_chars.resize(1 + bsize + file_len); res_buf = reinterpret_cast(&res_chars[0]); - rlen = in.read(res_buf + file_len, 4096); + rlen = in.read(res_buf + file_len, bsize); } file_len += rlen; + */ + + res_offsets.push_back(file_len + 1); res_buf[file_len] = '\0'; From 2d2277245535d1dda55c64ad4535d1ffacb5e707 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 16 Jan 2021 11:27:31 +0800 Subject: [PATCH 09/94] Handle with context pass --- CMakeLists.txt | 4 +--- src/Functions/FunctionFile.cpp | 27 ++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 853b2df7aca..3a37ba4c28e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -375,9 +375,7 @@ else () option(WERROR "Enable -Werror compiler option" ON) endif () -if (WERROR) - add_warning(error) -endif () +option(WERROR "Enable -Werror compiler option" OFF) # Make this extra-checks for correct library dependencies. if (OS_LINUX AND NOT SANITIZE) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 1450b748955..0d8f315cdea 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace DB @@ -15,15 +17,19 @@ namespace DB extern const int NOT_IMPLEMENTED; } + void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path); -/** A function to read file as a string. + + /** A function to read file as a string. */ class FunctionFile : public IFunction { public: static constexpr auto name = "file"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } + static FunctionPtr create(const Context &context) { return std::make_shared(context); } + //static FunctionPtr create() { return std::make_shared(); } + explicit FunctionFile(const Context &context_) : context(context_) {}; + //FunctionFile() {}; String getName() const override { return name; } @@ -52,13 +58,21 @@ namespace DB auto & res_chars = res->getChars(); auto & res_offsets = res->getOffsets(); - //TBD: Here, need to restrict the access permission for only user_path... + //File_path access permission check. + const String user_files_path = context.getUserFilesPath(); + String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); + Poco::Path poco_filepath = Poco::Path(filename); + if (poco_filepath.isRelative()) + poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); + const String file_absolute_path = poco_filepath.absolute().toString(); + checkCreationIsAllowed(context, user_files_absolute_path, file_absolute_path); + //Start read from file. ReadBufferFromFile in(filename); // Method-1: Read the whole file at once size_t file_len = Poco::File(filename).getSize(); - res_chars.resize(file_len + 1); + res_chars.resize_exact(file_len + 1); char *res_buf = reinterpret_cast(&res_chars[0]); in.readStrict(res_buf, file_len); @@ -88,6 +102,9 @@ namespace DB throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); } } + + private: + const Context & context; }; void registerFunctionFromFile(FunctionFactory & factory) From 29aa0da28c7099771121924e23743910e1e666b9 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 16 Jan 2021 14:55:59 +0800 Subject: [PATCH 10/94] Make filepath check done but with infile func, need to modify the ld path --- src/Functions/FunctionFile.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 0d8f315cdea..7e362ca539b 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -6,7 +6,8 @@ #include #include #include - +#include +#include namespace DB { @@ -20,6 +21,25 @@ namespace DB void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path); + inline bool startsWith2(const std::string & s, const std::string & prefix) + { + return s.size() >= prefix.size() && 0 == memcmp(s.data(), prefix.data(), prefix.size()); + } + + void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path) + { + if (context_global.getApplicationType() != Context::ApplicationType::SERVER) + return; + + /// "/dev/null" is allowed for perf testing + if (!startsWith2(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception("File is not inside " + db_dir_path, 9); + + Poco::File table_path_poco_file = Poco::File(table_path); + if (table_path_poco_file.exists() && table_path_poco_file.isDirectory()) + throw Exception("File must not be a directory", 9); + } + /** A function to read file as a string. */ class FunctionFile : public IFunction From 77e74b397c30efbdfaf4a139facdcdbcc4919cd4 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 16 Jan 2021 18:43:56 +0800 Subject: [PATCH 11/94] Add file access check, also give another read method in comments for reference --- src/Functions/FunctionFile.cpp | 84 +++++++++++++++------------------- 1 file changed, 38 insertions(+), 46 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 7e362ca539b..1de98cc3f38 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include namespace DB { @@ -15,29 +15,14 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; + extern const int TOO_LARGE_STRING_SIZE; extern const int NOT_IMPLEMENTED; - } - - void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path); - - - inline bool startsWith2(const std::string & s, const std::string & prefix) - { - return s.size() >= prefix.size() && 0 == memcmp(s.data(), prefix.data(), prefix.size()); - } - - void checkCreationIsAllowed(const Context & context_global, const std::string & db_dir_path, const std::string & table_path) - { - if (context_global.getApplicationType() != Context::ApplicationType::SERVER) - return; - - /// "/dev/null" is allowed for perf testing - if (!startsWith2(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, 9); - - Poco::File table_path_poco_file = Poco::File(table_path); - if (table_path_poco_file.exists() && table_path_poco_file.isDirectory()) - throw Exception("File must not be a directory", 9); + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int INCORRECT_FILE_NAME; + extern const int DATABASE_ACCESS_DENIED; } /** A function to read file as a string. @@ -47,9 +32,7 @@ namespace DB public: static constexpr auto name = "file"; static FunctionPtr create(const Context &context) { return std::make_shared(context); } - //static FunctionPtr create() { return std::make_shared(); } explicit FunctionFile(const Context &context_) : context(context_) {}; - //FunctionFile() {}; String getName() const override { return name; } @@ -78,40 +61,36 @@ namespace DB auto & res_chars = res->getChars(); auto & res_offsets = res->getOffsets(); - //File_path access permission check. + //File access permission check const String user_files_path = context.getUserFilesPath(); String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); Poco::Path poco_filepath = Poco::Path(filename); if (poco_filepath.isRelative()) poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); const String file_absolute_path = poco_filepath.absolute().toString(); - checkCreationIsAllowed(context, user_files_absolute_path, file_absolute_path); + checkReadIsAllowed(user_files_absolute_path, file_absolute_path); - //Start read from file. - ReadBufferFromFile in(filename); - - // Method-1: Read the whole file at once - size_t file_len = Poco::File(filename).getSize(); + //Method-1: Read file with ReadBuffer + ReadBufferFromFile in(file_absolute_path); + ssize_t file_len = Poco::File(file_absolute_path).getSize(); res_chars.resize_exact(file_len + 1); char *res_buf = reinterpret_cast(&res_chars[0]); in.readStrict(res_buf, file_len); /* - //Method-2: Read with loop - - char *res_buf; - size_t file_len = 0, rlen = 0, bsize = 4096; - while (0 == file_len || rlen == bsize) - { - file_len += rlen; - res_chars.resize(1 + bsize + file_len); - res_buf = reinterpret_cast(&res_chars[0]); - rlen = in.read(res_buf + file_len, bsize); - } - file_len += rlen; + //Method-2: Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer + int fd; + if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY))) + throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path), + errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); + if (file_len != pread(fd, res_buf, file_len, 0)) + throwFromErrnoWithPath("Read failed with " + std::string(file_absolute_path), std::string(file_absolute_path), + ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + if (0 != close(fd)) + throw Exception("Cannot close file " + std::string(file_absolute_path), ErrorCodes::CANNOT_CLOSE_FILE); + fd = -1; */ - res_offsets.push_back(file_len + 1); res_buf[file_len] = '\0'; @@ -124,9 +103,22 @@ namespace DB } private: + void checkReadIsAllowed(const std::string & user_files_path, const std::string & file_path) const + { + // If run in Local mode, no need for path checking. + if (context.getApplicationType() != Context::ApplicationType::LOCAL) + if (file_path.find(user_files_path) != 0) + throw Exception("File is not inside " + user_files_path, ErrorCodes::DATABASE_ACCESS_DENIED); + + Poco::File path_poco_file = Poco::File(file_path); + if (path_poco_file.exists() && path_poco_file.isDirectory()) + throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); + } + const Context & context; }; + void registerFunctionFromFile(FunctionFactory & factory) { factory.registerFunction(); From 85e4bfa566f35d6a4ab87639610f59c628599c38 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 16 Jan 2021 19:31:15 +0800 Subject: [PATCH 12/94] Remove CMakefile from vcs --- CMakeLists.txt | 565 ------------------------------------------------- 1 file changed, 565 deletions(-) delete mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 3a37ba4c28e..00000000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,565 +0,0 @@ -cmake_minimum_required(VERSION 3.3) - -foreach(policy - CMP0023 - CMP0048 # CMake 3.0 - CMP0074 # CMake 3.12 - CMP0077 - CMP0079 - ) - if(POLICY ${policy}) - cmake_policy(SET ${policy} NEW) - endif() -endforeach() - -# set default policy -foreach(default_policy_var_name - # make option() honor normal variables for BUILD_SHARED_LIBS: - # - re2 - # - snappy - CMAKE_POLICY_DEFAULT_CMP0077 - # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should - # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_)? over - # INTERFACE_LINK_LIBRARIES. - CMAKE_POLICY_DEFAULT_CMP0022 - ) - set(${default_policy_var_name} NEW) -endforeach() - -project(ClickHouse) - -# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. -option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION - "Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) - but is not possible to satisfy" ON) - -if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION) - set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR) -else() - set(RECONFIGURE_MESSAGE_LEVEL STATUS) -endif() - -include (cmake/arch.cmake) -include (cmake/target.cmake) -include (cmake/tools.cmake) -include (cmake/analysis.cmake) - -# Ignore export() since we don't use it, -# but it gets broken with a global targets via link_libraries() -macro (export) -endmacro () - -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") -set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json -set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so -set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) -set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. - -# Enable the ability to organize targets into hierarchies of "folders" for capable GUI-based IDEs. -# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html -set_property(GLOBAL PROPERTY USE_FOLDERS ON) - -# Check that submodules are present only if source was downloaded with git -if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") - message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") -endif () - -include (cmake/find/ccache.cmake) - -option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF) -if (ENABLE_CHECK_HEAVY_BUILDS) - # set DATA (since RSS does not work since 2.6.x+) to 2G - set (RLIMIT_DATA 5000000000) - # set VIRT (RLIMIT_AS) to 10G (DATA*10) - set (RLIMIT_AS 10000000000) - # gcc10/gcc10/clang -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory" OR COMPILER_GCC) - set (RLIMIT_DATA 10000000000) - endif() - set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600) -endif () - -if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None") - set (CMAKE_BUILD_TYPE "RelWithDebInfo") - message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = ${CMAKE_BUILD_TYPE}") -endif () -message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") - -string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) - -option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON) -option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES}) - -if (NOT MAKE_STATIC_LIBRARIES) - # DEVELOPER ONLY. - # Faster linking if turned on. - option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files") - - option(CLICKHOUSE_SPLIT_BINARY - "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled") -endif () - -if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.") -endif() - -if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) - set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") -endif () - -if (USE_STATIC_LIBRARIES) - list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) -endif () - -# Implies ${WITH_COVERAGE} -option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF) - -if (ENABLE_FUZZING) - message (STATUS "Fuzzing instrumentation enabled") - set (WITH_COVERAGE ON) - set (FUZZER "libfuzzer") -endif() - -# Global libraries -# See: -# - default_libs.cmake -# - sanitize.cmake -add_library(global-libs INTERFACE) - -include (cmake/fuzzer.cmake) -include (cmake/sanitize.cmake) - -if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD) - # Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") -endif () - -include (cmake/add_warning.cmake) - -if (NOT MSVC) - set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror and many more is also added inside cmake/warnings.cmake -endif () - -if (COMPILER_CLANG) - # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument] - set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument") - # generate ranges for fast "addr2line" search - if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") - endif () -endif () - -# If turned `ON`, assumes the user has either the system GTest library or the bundled one. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) - -if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") - # Only for Linux, x86_64. - # Implies ${ENABLE_FASTMEMCPY} - option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) -elseif(GLIBC_COMPATIBILITY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") -endif () - -if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") - message (WARNING "CMake version must be greater than 3.9.0 for production builds.") -endif () - -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") - -if (OS_LINUX) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") - if (OBJCOPY_PATH) - message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") - - if (ARCH_AMD64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386) - elseif (ARCH_AARCH64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64) - endif () - else () - message(FATAL_ERROR "Cannot find objcopy.") - endif () -endif () - -if (OS_DARWIN) - set(WHOLE_ARCHIVE -all_load) - set(NO_WHOLE_ARCHIVE -noall_load) -else () - set(WHOLE_ARCHIVE --whole-archive) - set(NO_WHOLE_ARCHIVE --no-whole-archive) -endif () - -# Ignored if `lld` is used -option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.") - -if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - # Can be lld or ld-lld. - if (LINKER_NAME MATCHES "lld$") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index") - message (STATUS "Adding .gdb-index via --gdb-index linker option.") - # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces - # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932 - elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD) - find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable") - if (NOT GDB_ADD_INDEX_EXE) - set (USE_GDB_ADD_INDEX 0) - message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.") - else() - set (USE_GDB_ADD_INDEX 1) - message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}") - endif() - endif () -endif() - -# Create BuildID when using lld. For other linkers it is created by default. -if (LINKER_NAME MATCHES "lld$") - # SHA1 is not cryptographically secure but it is the best what lld is offering. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") -endif () - -# Add a section with the hash of the compiled machine code for integrity checks. -# Only for official builds, because adding a section can be time consuming (rewrite of several GB). -# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) -if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) - set (USE_BINARY_HASH 1) -endif () - -cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd - - -if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000) - # Less `/tmp` usage, more RAM usage. - option(COMPILER_PIPE "-pipe compiler option" ON) -endif() - -if(COMPILER_PIPE) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe") -else() - message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)") -endif() - -if(NOT DISABLE_CPU_OPTIMIZE) - include(cmake/cpu_features.cmake) -endif() - -option(ARCH_NATIVE "Add -march=native compiler flag") - -if (ARCH_NATIVE) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") -endif () - -if (COMPILER_GCC OR COMPILER_CLANG) - # to make numeric_limits<__int128> works with GCC - set (_CXX_STANDARD "gnu++2a") -else() - set (_CXX_STANDARD "c++2a") -endif() - -# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now -# set (CMAKE_CXX_STANDARD 20) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}") - -set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS -set (CMAKE_CXX_STANDARD_REQUIRED ON) - -if (COMPILER_GCC OR COMPILER_CLANG) - # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -endif () - -# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc -option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF) - -if (WITH_COVERAGE AND COMPILER_CLANG) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") - # If we want to disable coverage for specific translation units - set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") -endif() - -if (WITH_COVERAGE AND COMPILER_GCC) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") - set(COVERAGE_OPTION "-lgcov") - set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage") -endif() - -set(COMPILER_FLAGS "${COMPILER_FLAGS}") - -set (CMAKE_BUILD_COLOR_MAKEFILE ON) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}") -set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}") - -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}") -set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") - -if (COMPILER_CLANG) - if (OS_DARWIN) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main") - endif() - - # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - - if (NOT ENABLE_TESTS AND NOT SANITIZE) - # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang only. - # Disabled when building with tests or sanitizers. - option(ENABLE_THINLTO "Clang-specific link time optimization" ON) - endif() - - # Set new experimental pass manager, it's a performance, build time and binary size win. - # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") - - # We cannot afford to use LTO when compiling unit tests, and it's not enough - # to only supply -fno-lto at the final linking stage. So we disable it - # completely. - if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE) - # Link time optimization - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin") - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin") - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin") - elseif (ENABLE_THINLTO) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO") - endif () - - # Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") - - if (LLVM_AR_PATH) - message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.") - set (CMAKE_AR ${LLVM_AR_PATH}) - else () - message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.") - endif () - - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8") - - if (LLVM_RANLIB_PATH) - message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.") - set (CMAKE_RANLIB ${LLVM_RANLIB_PATH}) - else () - message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.") - endif () - -elseif (ENABLE_THINLTO) - message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang") -endif () - -# Turns on all external libs like s3, kafka, ODBC, ... -option(ENABLE_LIBRARIES "Enable all external libraries by default" ON) - -# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your -# system. -# This mode exists for enthusiastic developers who are searching for trouble. -# Useful for maintainers of OS packages. -option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF) - -if (UNBUNDLED) - set(NOT_UNBUNDLED OFF) -else () - set(NOT_UNBUNDLED ON) -endif () - -if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN)) - # Using system libs can cause a lot of warnings in includes (on macro expansion). - option(WERROR "Enable -Werror compiler option" OFF) -else () - option(WERROR "Enable -Werror compiler option" ON) -endif () - -option(WERROR "Enable -Werror compiler option" OFF) - -# Make this extra-checks for correct library dependencies. -if (OS_LINUX AND NOT SANITIZE) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") -endif () - -include(cmake/dbms_glob_sources.cmake) - -if (OS_LINUX OR OS_ANDROID) - include(cmake/linux/default_libs.cmake) -elseif (OS_DARWIN) - include(cmake/darwin/default_libs.cmake) -elseif (OS_FREEBSD) - include(cmake/freebsd/default_libs.cmake) -endif () - -###################################### -### Add targets below this comment ### -###################################### - -set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") - -if (MAKE_STATIC_LIBRARIES) - set (CMAKE_POSITION_INDEPENDENT_CODE OFF) - if (OS_LINUX AND NOT ARCH_ARM) - # Slightly more efficient code can be generated - # It's disabled for ARM because otherwise ClickHouse cannot run on Android. - set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") - set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie") - endif () -else () - set (CMAKE_POSITION_INDEPENDENT_CODE ON) -endif () - -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Automatically reduce unneeded includes in source code (external tool)" OFF) - -if (USE_INCLUDE_WHAT_YOU_USE) - find_program(IWYU_PATH NAMES include-what-you-use iwyu) - if (NOT IWYU_PATH) - message(FATAL_ERROR "Could not find the program include-what-you-use") - endif() - if (${CMAKE_VERSION} VERSION_LESS "3.3.0") - message(FATAL_ERROR "include-what-you-use requires CMake version at least 3.3.") - endif() -endif () - -if (ENABLE_TESTS) - message (STATUS "Unit tests are enabled") -else() - message(STATUS "Unit tests are disabled") -endif () - -enable_testing() # Enable for tests without binary - -# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc -if (CMAKE_INSTALL_PREFIX STREQUAL "/usr") - set (CLICKHOUSE_ETC_DIR "/etc") -else () - set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc") -endif () - -message (STATUS - "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; - USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} - MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} - SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} - UNBUNDLED=${UNBUNDLED} - CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}") - -include (GNUInstallDirs) -include (cmake/contrib_finder.cmake) - -find_contrib_lib(double-conversion) # Must be before parquet -include (cmake/find/ssl.cmake) -include (cmake/find/ldap.cmake) # after ssl -include (cmake/find/icu.cmake) -include (cmake/find/zlib.cmake) -include (cmake/find/zstd.cmake) -include (cmake/find/ltdl.cmake) # for odbc -# openssl, zlib before poco -include (cmake/find/sparsehash.cmake) -include (cmake/find/re2.cmake) -include (cmake/find/krb5.cmake) -include (cmake/find/libgsasl.cmake) -include (cmake/find/cyrus-sasl.cmake) -include (cmake/find/rdkafka.cmake) -include (cmake/find/amqpcpp.cmake) -include (cmake/find/capnp.cmake) -include (cmake/find/llvm.cmake) -include (cmake/find/termcap.cmake) # for external static llvm -include (cmake/find/h3.cmake) -include (cmake/find/libxml2.cmake) -include (cmake/find/brotli.cmake) -include (cmake/find/protobuf.cmake) -include (cmake/find/grpc.cmake) -include (cmake/find/pdqsort.cmake) -include (cmake/find/miniselect.cmake) -include (cmake/find/hdfs3.cmake) # uses protobuf -include (cmake/find/poco.cmake) -include (cmake/find/curl.cmake) -include (cmake/find/s3.cmake) -include (cmake/find/base64.cmake) -include (cmake/find/parquet.cmake) -include (cmake/find/simdjson.cmake) -include (cmake/find/fast_float.cmake) -include (cmake/find/rapidjson.cmake) -include (cmake/find/fastops.cmake) -include (cmake/find/odbc.cmake) -include (cmake/find/rocksdb.cmake) -include (cmake/find/nuraft.cmake) - - -if(NOT USE_INTERNAL_PARQUET_LIBRARY) - set (ENABLE_ORC OFF CACHE INTERNAL "") -endif() -include (cmake/find/orc.cmake) - -include (cmake/find/avro.cmake) -include (cmake/find/msgpack.cmake) -include (cmake/find/cassandra.cmake) -include (cmake/find/sentry.cmake) -include (cmake/find/stats.cmake) - -set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") -find_contrib_lib(cityhash) - -find_contrib_lib(farmhash) - -if (ENABLE_TESTS) - include (cmake/find/gtest.cmake) -endif () - -# Need to process before "contrib" dir: -include (cmake/find/mysqlclient.cmake) - -# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. - -include (cmake/print_flags.cmake) - -if (TARGET global-group) - install (EXPORT global DESTINATION cmake) -endif () - -add_subdirectory (contrib EXCLUDE_FROM_ALL) - -if (NOT ENABLE_JEMALLOC) - message (WARNING "Non default allocator is disabled. This is not recommended for production builds.") -endif () - -macro (add_executable target) - # invoke built-in add_executable - # explicitly acquire and interpose malloc symbols by clickhouse_malloc - # if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation. - if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO) - _add_executable (${ARGV} $ $) - else () - _add_executable (${ARGV} $) - endif () - - get_target_property (type ${target} TYPE) - if (${type} STREQUAL EXECUTABLE) - # disabled for TSAN and gcc since libtsan.a provides overrides too - if (TARGET clickhouse_new_delete) - # operator::new/delete for executables (MemoryTracker stuff) - target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) - endif() - endif() -endmacro() - -set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") -include_directories(${ConfigIncludePath}) - -# Add as many warnings as possible for our own code. -include (cmake/warnings.cmake) - -add_subdirectory (base) -add_subdirectory (src) -add_subdirectory (programs) -add_subdirectory (tests) -add_subdirectory (utils) - -include (cmake/print_include_directories.cmake) - -include (cmake/sanitize_target_link_libraries.cmake) From fe78b31ed4d85e17b38aa16d1f4ea31502f0dc5b Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 16 Jan 2021 20:35:41 +0800 Subject: [PATCH 13/94] Move register to the Misc group --- src/Functions/FunctionFile.cpp | 2 +- src/Functions/FunctionsConversion.cpp | 2 -- src/Functions/registerFunctionsMiscellaneous.cpp | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 1de98cc3f38..d1e35c1d31e 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -119,7 +119,7 @@ namespace DB }; - void registerFunctionFromFile(FunctionFactory & factory) + void registerFunctionFile(FunctionFactory & factory) { factory.registerFunction(); } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index a6866ce0939..c59452ebab0 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -6,7 +6,6 @@ namespace DB { void registerFunctionFixedString(FunctionFactory & factory); -void registerFunctionFromFile(FunctionFactory & factory); void registerFunctionsConversion(FunctionFactory & factory) { @@ -37,7 +36,6 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); registerFunctionFixedString(factory); - registerFunctionFromFile(factory); factory.registerFunction(); diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 653922bbced..de6d093e2b0 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -67,6 +67,7 @@ void registerFunctionInitializeAggregation(FunctionFactory &); void registerFunctionErrorCodeToName(FunctionFactory &); void registerFunctionTcpPort(FunctionFactory &); void registerFunctionByteSize(FunctionFactory &); +void registerFunctionFile(FunctionFactory & factory); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -134,6 +135,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionErrorCodeToName(factory); registerFunctionTcpPort(factory); registerFunctionByteSize(factory); + registerFunctionFile(factory); #if USE_ICU registerFunctionConvertCharset(factory); From 5ba67b11132457b932b8f608522d8677a9ab4228 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sun, 17 Jan 2021 02:55:07 +0800 Subject: [PATCH 14/94] Add test case. --- .../01658_read_file_to_stringcolumn.reference | 20 +++++ .../01658_read_file_to_stringcolumn.sh | 76 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference create mode 100755 tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference new file mode 100644 index 00000000000..82bc7c9ca90 --- /dev/null +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -0,0 +1,20 @@ +aaaaaaaaa bbbbbbbbb +:0 +:0 +:0 +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +:0 +:107 +:79 +:35 +699415 +aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +699415 0 +:0 +:107 +:79 diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh new file mode 100755 index 00000000000..1ee68b3ff11 --- /dev/null +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +set -eu + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Data preparation +# When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple +echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt +echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt +echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt +echo -n ccccccccc > /tmp/c.txt +mkdir /var/lib/clickhouse/user_files/dir + +### 1st TEST in CLIENT mode. +${CLICKHOUSE_CLIENT} --query "drop table if exists data;" +${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;" + + +# Valid cases: +${CLICKHOUSE_CLIENT} --query "select file('a.txt'), file('b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('c.txt'), * from data";echo ":"$? + + +# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) +# Test non-exists file +echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +# Test isDir +echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +# Test path out of the user_files directory. It's not allowed in client mode +echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + + + +### 2nd TEST in LOCAL mode. + +echo -n aaaaaaaaa > a.txt +echo -n bbbbbbbbb > b.txt +echo -n ccccccccc > c.txt +mkdir dir +#Test for large files, with length : 699415 +c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}') +echo $c_count + +# Valid cases: +# The default dir is the CWD path in LOCAL mode +${CLICKHOUSE_LOCAL} --query " + drop table if exists data; + create table data (A String, B String) engine=MergeTree() order by A; + select file('a.txt'), file('b.txt'); + insert into data select file('a.txt'), file('b.txt'); + insert into data select file('a.txt'), file('b.txt'); + select file('c.txt'), * from data; + select file('/tmp/c.txt'), * from data; + select $c_count, $c_count -length(file('${CURDIR}/01518_nullable_aggregate_states2.reference')) +" +echo ":"$? + + +# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) +# Test non-exists file +echo "clickhouse-local --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + +# Test isDir +echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + +# Restore +rm -rf a.txt b.txt c.txt dir +rm -rf /var/lib/clickhouse/user_files/a.txt +rm -rf /var/lib/clickhouse/user_files/b.txt +rm -rf /var/lib/clickhouse/user_files/c.txt +rm -rf /tmp/c.txt +rm -rf /var/lib/clickhouse/user_files/dir From 8f3cdb69e6ee9f72e8fecfd3dca4cc527903faef Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sun, 17 Jan 2021 03:07:42 +0800 Subject: [PATCH 15/94] Delete several spaces just formatting --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 src/Functions/FunctionsConversion.cpp diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp old mode 100644 new mode 100755 index c59452ebab0..257b852ecd8 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -36,7 +36,7 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); registerFunctionFixedString(factory); - + factory.registerFunction(); factory.registerFunction>(FunctionFactory::CaseInsensitive); From 2379902e2adf789433989abdbf241f19e052597e Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sun, 17 Jan 2021 14:27:18 +0800 Subject: [PATCH 16/94] Return data type revise --- src/Functions/FunctionFile.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index d1e35c1d31e..e84fd15fbbd 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -41,8 +41,8 @@ namespace DB DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isStringOrFixedString(arguments[0].type)) - throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); + if (!isString(arguments[0].type)) + throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED); return std::make_shared(); } @@ -78,7 +78,7 @@ namespace DB in.readStrict(res_buf, file_len); /* - //Method-2: Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer + //Method-2(Just for reference): Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer int fd; if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY))) throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path), From b3e44f202bad10356d5640585abb1f3054c8c26d Mon Sep 17 00:00:00 2001 From: keenwolf Date: Mon, 18 Jan 2021 11:10:52 +0800 Subject: [PATCH 17/94] add back CmakeLists.txt --- CMakeLists.txt | 568 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000000..9002f1df140 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,568 @@ +cmake_minimum_required(VERSION 3.3) + +foreach(policy + CMP0023 + CMP0048 # CMake 3.0 + CMP0074 # CMake 3.12 + CMP0077 + CMP0079 + ) + if(POLICY ${policy}) + cmake_policy(SET ${policy} NEW) + endif() +endforeach() + +# set default policy +foreach(default_policy_var_name + # make option() honor normal variables for BUILD_SHARED_LIBS: + # - re2 + # - snappy + CMAKE_POLICY_DEFAULT_CMP0077 + # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should + # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_)? over + # INTERFACE_LINK_LIBRARIES. + CMAKE_POLICY_DEFAULT_CMP0022 + ) + set(${default_policy_var_name} NEW) +endforeach() + +project(ClickHouse) + +# If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. +option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION + "Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) + but is not possible to satisfy" ON) + +if(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION) + set(RECONFIGURE_MESSAGE_LEVEL FATAL_ERROR) +else() + set(RECONFIGURE_MESSAGE_LEVEL STATUS) +endif() + +include (cmake/arch.cmake) +include (cmake/target.cmake) +include (cmake/tools.cmake) +include (cmake/analysis.cmake) + +# Ignore export() since we don't use it, +# but it gets broken with a global targets via link_libraries() +macro (export) +endmacro () + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json +set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so +set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) +set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. + +# Enable the ability to organize targets into hierarchies of "folders" for capable GUI-based IDEs. +# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +# Check that submodules are present only if source was downloaded with git +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") + message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") +endif () + +include (cmake/find/ccache.cmake) + +option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile too long or to take too much memory while compiling" OFF) +if (ENABLE_CHECK_HEAVY_BUILDS) + # set DATA (since RSS does not work since 2.6.x+) to 2G + set (RLIMIT_DATA 5000000000) + # set VIRT (RLIMIT_AS) to 10G (DATA*10) + set (RLIMIT_AS 10000000000) + # gcc10/gcc10/clang -fsanitize=memory is too heavy + if (SANITIZE STREQUAL "memory" OR COMPILER_GCC) + set (RLIMIT_DATA 10000000000) + endif() + set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=600) +endif () + +if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None") + set (CMAKE_BUILD_TYPE "RelWithDebInfo") + message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = ${CMAKE_BUILD_TYPE}") +endif () +message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") + +string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) + +option(USE_STATIC_LIBRARIES "Disable to use shared libraries" ON) +option(MAKE_STATIC_LIBRARIES "Disable to make shared libraries" ${USE_STATIC_LIBRARIES}) + +if (NOT MAKE_STATIC_LIBRARIES) + # DEVELOPER ONLY. + # Faster linking if turned on. + option(SPLIT_SHARED_LIBRARIES "Keep all internal libraries as separate .so files") + + option(CLICKHOUSE_SPLIT_BINARY + "Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled") +endif () + +if (MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) + message(FATAL_ERROR "Defining SPLIT_SHARED_LIBRARIES=1 without MAKE_STATIC_LIBRARIES=0 has no effect.") +endif() + +if (NOT MAKE_STATIC_LIBRARIES AND SPLIT_SHARED_LIBRARIES) + set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") +endif () + +if (USE_STATIC_LIBRARIES) + list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) +endif () + +# Implies ${WITH_COVERAGE} +option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF) + +if (ENABLE_FUZZING) + message (STATUS "Fuzzing instrumentation enabled") + set (WITH_COVERAGE ON) + set (FUZZER "libfuzzer") +endif() + +# Global libraries +# See: +# - default_libs.cmake +# - sanitize.cmake +add_library(global-libs INTERFACE) + +include (cmake/fuzzer.cmake) +include (cmake/sanitize.cmake) + +if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD) + # Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") +endif () + +include (cmake/add_warning.cmake) + +if (NOT MSVC) + set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror and many more is also added inside cmake/warnings.cmake +endif () + +if (COMPILER_CLANG) + # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument] + set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument") + # generate ranges for fast "addr2line" search + if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") + set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") + endif () +endif () + +# If turned `ON`, assumes the user has either the system GTest library or the bundled one. +option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) + +if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + # Only for Linux, x86_64. + # Implies ${ENABLE_FASTMEMCPY} + option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) +elseif(GLIBC_COMPATIBILITY) + message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") +endif () + +if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") + message (WARNING "CMake version must be greater than 3.9.0 for production builds.") +endif () + +# Make sure the final executable has symbols exported +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") + +if (OS_LINUX) + find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") + if (OBJCOPY_PATH) + message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") + + if (ARCH_AMD64) + set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386) + elseif (ARCH_AARCH64) + set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64) + endif () + else () + message(FATAL_ERROR "Cannot find objcopy.") + endif () +endif () + +if (OS_DARWIN) + set(WHOLE_ARCHIVE -all_load) + set(NO_WHOLE_ARCHIVE -noall_load) +else () + set(WHOLE_ARCHIVE --whole-archive) + set(NO_WHOLE_ARCHIVE --no-whole-archive) +endif () + +# Ignored if `lld` is used +option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.") + +if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") + # Can be lld or ld-lld. + if (LINKER_NAME MATCHES "lld$") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index") + message (STATUS "Adding .gdb-index via --gdb-index linker option.") + # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces + # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932 + elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD) + find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable") + if (NOT GDB_ADD_INDEX_EXE) + set (USE_GDB_ADD_INDEX 0) + message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.") + else() + set (USE_GDB_ADD_INDEX 1) + message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}") + endif() + endif () +endif() + +# Create BuildID when using lld. For other linkers it is created by default. +if (LINKER_NAME MATCHES "lld$") + # SHA1 is not cryptographically secure but it is the best what lld is offering. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1") +endif () + +# Add a section with the hash of the compiled machine code for integrity checks. +# Only for official builds, because adding a section can be time consuming (rewrite of several GB). +# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary) +if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) + set (USE_BINARY_HASH 1) +endif () + +cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd + + +if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000) + # Less `/tmp` usage, more RAM usage. + option(COMPILER_PIPE "-pipe compiler option" ON) +endif() + +if(COMPILER_PIPE) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe") +else() + message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)") +endif() + +if(NOT DISABLE_CPU_OPTIMIZE) + include(cmake/cpu_features.cmake) +endif() + +option(ARCH_NATIVE "Add -march=native compiler flag") + +if (ARCH_NATIVE) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") +endif () + +if (COMPILER_GCC OR COMPILER_CLANG) + # to make numeric_limits<__int128> works with GCC + set (_CXX_STANDARD "gnu++2a") +else() + set (_CXX_STANDARD "c++2a") +endif() + +# cmake < 3.12 doesn't support 20. We'll set CMAKE_CXX_FLAGS for now +# set (CMAKE_CXX_STANDARD 20) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=${_CXX_STANDARD}") + +set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS +set (CMAKE_CXX_STANDARD_REQUIRED ON) + +if (COMPILER_GCC OR COMPILER_CLANG) + # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") +endif () + +# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc +option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF) + +if (WITH_COVERAGE AND COMPILER_CLANG) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + # If we want to disable coverage for specific translation units + set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") +endif() + +if (WITH_COVERAGE AND COMPILER_GCC) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") + set(COVERAGE_OPTION "-lgcov") + set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage") +endif() + +set(COMPILER_FLAGS "${COMPILER_FLAGS}") + +set (CMAKE_BUILD_COLOR_MAKEFILE ON) +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}") +set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}") +set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}") + +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") + +if (COMPILER_CLANG) + if (OS_DARWIN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main") + endif() + + # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") + + if (NOT ENABLE_TESTS AND NOT SANITIZE) + # https://clang.llvm.org/docs/ThinLTO.html + # Applies to clang only. + # Disabled when building with tests or sanitizers. + option(ENABLE_THINLTO "Clang-specific link time optimization" ON) + endif() + + # Set new experimental pass manager, it's a performance, build time and binary size win. + # Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager") + + # We cannot afford to use LTO when compiling unit tests, and it's not enough + # to only supply -fno-lto at the final linking stage. So we disable it + # completely. + if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE) + # Link time optimization + set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin") + set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin") + set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin") + elseif (ENABLE_THINLTO) + message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO") + endif () + + # Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled + find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") + + if (LLVM_AR_PATH) + message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.") + set (CMAKE_AR ${LLVM_AR_PATH}) + else () + message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.") + endif () + + find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8") + + if (LLVM_RANLIB_PATH) + message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.") + set (CMAKE_RANLIB ${LLVM_RANLIB_PATH}) + else () + message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.") + endif () + +elseif (ENABLE_THINLTO) + message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with CLang") +endif () + +# Turns on all external libs like s3, kafka, ODBC, ... +option(ENABLE_LIBRARIES "Enable all external libraries by default" ON) + +# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your +# system. +# This mode exists for enthusiastic developers who are searching for trouble. +# Useful for maintainers of OS packages. +option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF) + +if (UNBUNDLED) + set(NOT_UNBUNDLED OFF) +else () + set(NOT_UNBUNDLED ON) +endif () + +if (UNBUNDLED OR NOT (OS_LINUX OR OS_DARWIN)) + # Using system libs can cause a lot of warnings in includes (on macro expansion). + option(WERROR "Enable -Werror compiler option" OFF) +else () + option(WERROR "Enable -Werror compiler option" ON) +endif () + +if (WERROR) + add_warning(error) +endif () + +# Make this extra-checks for correct library dependencies. +if (OS_LINUX AND NOT SANITIZE) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") +endif () + +include(cmake/dbms_glob_sources.cmake) + +if (OS_LINUX OR OS_ANDROID) + include(cmake/linux/default_libs.cmake) +elseif (OS_DARWIN) + include(cmake/darwin/default_libs.cmake) +elseif (OS_FREEBSD) + include(cmake/freebsd/default_libs.cmake) +endif () + +###################################### +### Add targets below this comment ### +###################################### + +set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") + +if (MAKE_STATIC_LIBRARIES) + set (CMAKE_POSITION_INDEPENDENT_CODE OFF) + if (OS_LINUX AND NOT ARCH_ARM) + # Slightly more efficient code can be generated + # It's disabled for ARM because otherwise ClickHouse cannot run on Android. + set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") + set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie") + endif () +else () + set (CMAKE_POSITION_INDEPENDENT_CODE ON) +endif () + +# https://github.com/include-what-you-use/include-what-you-use +option (USE_INCLUDE_WHAT_YOU_USE "Automatically reduce unneeded includes in source code (external tool)" OFF) + +if (USE_INCLUDE_WHAT_YOU_USE) + find_program(IWYU_PATH NAMES include-what-you-use iwyu) + if (NOT IWYU_PATH) + message(FATAL_ERROR "Could not find the program include-what-you-use") + endif() + if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + message(FATAL_ERROR "include-what-you-use requires CMake version at least 3.3.") + endif() +endif () + +if (ENABLE_TESTS) + message (STATUS "Unit tests are enabled") +else() + message(STATUS "Unit tests are disabled") +endif () + +enable_testing() # Enable for tests without binary + +# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc +if (CMAKE_INSTALL_PREFIX STREQUAL "/usr") + set (CLICKHOUSE_ETC_DIR "/etc") +else () + set (CLICKHOUSE_ETC_DIR "${CMAKE_INSTALL_PREFIX}/etc") +endif () + +message (STATUS + "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; + USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} + MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} + SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} + UNBUNDLED=${UNBUNDLED} + CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}") + +include (GNUInstallDirs) +include (cmake/contrib_finder.cmake) + +find_contrib_lib(double-conversion) # Must be before parquet +include (cmake/find/ssl.cmake) +include (cmake/find/ldap.cmake) # after ssl +include (cmake/find/icu.cmake) +include (cmake/find/zlib.cmake) +include (cmake/find/zstd.cmake) +include (cmake/find/ltdl.cmake) # for odbc +# openssl, zlib before poco +include (cmake/find/sparsehash.cmake) +include (cmake/find/re2.cmake) +include (cmake/find/krb5.cmake) +include (cmake/find/libgsasl.cmake) +include (cmake/find/cyrus-sasl.cmake) +include (cmake/find/rdkafka.cmake) +include (cmake/find/amqpcpp.cmake) +include (cmake/find/capnp.cmake) +include (cmake/find/llvm.cmake) +include (cmake/find/termcap.cmake) # for external static llvm +include (cmake/find/h3.cmake) +include (cmake/find/libxml2.cmake) +include (cmake/find/brotli.cmake) +include (cmake/find/protobuf.cmake) +include (cmake/find/grpc.cmake) +include (cmake/find/pdqsort.cmake) +include (cmake/find/miniselect.cmake) +include (cmake/find/hdfs3.cmake) # uses protobuf +include (cmake/find/poco.cmake) +include (cmake/find/curl.cmake) +include (cmake/find/s3.cmake) +include (cmake/find/base64.cmake) +include (cmake/find/parquet.cmake) +include (cmake/find/simdjson.cmake) +include (cmake/find/fast_float.cmake) +include (cmake/find/rapidjson.cmake) +include (cmake/find/fastops.cmake) +include (cmake/find/odbc.cmake) +include (cmake/find/rocksdb.cmake) +include (cmake/find/libpqxx.cmake) +include (cmake/find/nuraft.cmake) + + +if(NOT USE_INTERNAL_PARQUET_LIBRARY) + set (ENABLE_ORC OFF CACHE INTERNAL "") +endif() +include (cmake/find/orc.cmake) + +include (cmake/find/avro.cmake) +include (cmake/find/msgpack.cmake) +include (cmake/find/cassandra.cmake) +include (cmake/find/sentry.cmake) +include (cmake/find/stats.cmake) + +set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") +find_contrib_lib(cityhash) + +find_contrib_lib(farmhash) + +if (ENABLE_TESTS) + include (cmake/find/gtest.cmake) +endif () + +# Need to process before "contrib" dir: +include (cmake/find/mysqlclient.cmake) + +# When testing for memory leaks with Valgrind, don't link tcmalloc or jemalloc. + +include (cmake/print_flags.cmake) + +if (TARGET global-group) + install (EXPORT global DESTINATION cmake) +endif () + +add_subdirectory (contrib EXCLUDE_FROM_ALL) + +if (NOT ENABLE_JEMALLOC) + message (WARNING "Non default allocator is disabled. This is not recommended for production builds.") +endif () + +macro (add_executable target) + # invoke built-in add_executable + # explicitly acquire and interpose malloc symbols by clickhouse_malloc + # if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation. + if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO) + _add_executable (${ARGV} $ $) + else () + _add_executable (${ARGV} $) + endif () + + get_target_property (type ${target} TYPE) + if (${type} STREQUAL EXECUTABLE) + # disabled for TSAN and gcc since libtsan.a provides overrides too + if (TARGET clickhouse_new_delete) + # operator::new/delete for executables (MemoryTracker stuff) + target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) + endif() + endif() +endmacro() + +set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") +include_directories(${ConfigIncludePath}) + +# Add as many warnings as possible for our own code. +include (cmake/warnings.cmake) + +add_subdirectory (base) +add_subdirectory (src) +add_subdirectory (programs) +add_subdirectory (tests) +add_subdirectory (utils) + +include (cmake/print_include_directories.cmake) + +include (cmake/sanitize_target_link_libraries.cmake) From 8463835c41a4d13d156dede6362069c051ad0e5f Mon Sep 17 00:00:00 2001 From: keenwolf Date: Tue, 19 Jan 2021 11:47:40 +0800 Subject: [PATCH 18/94] Remove extra semicolon --- src/Functions/FunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index e84fd15fbbd..c24d6aef890 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -32,7 +32,7 @@ namespace DB public: static constexpr auto name = "file"; static FunctionPtr create(const Context &context) { return std::make_shared(context); } - explicit FunctionFile(const Context &context_) : context(context_) {}; + explicit FunctionFile(const Context &context_) : context(context_) {} String getName() const override { return name; } From 47fb320651dd0db9fcc27e36f5e03661c1c0a53a Mon Sep 17 00:00:00 2001 From: keenwolf Date: Tue, 19 Jan 2021 14:04:25 +0800 Subject: [PATCH 19/94] Do little fix for Style check --- src/Functions/FunctionFile.cpp | 2 -- src/Functions/FunctionsConversion.cpp | 0 2 files changed, 2 deletions(-) mode change 100755 => 100644 src/Functions/FunctionsConversion.cpp diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index c24d6aef890..c493b2a2b88 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include namespace DB @@ -15,7 +14,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_COLUMN; - extern const int TOO_LARGE_STRING_SIZE; extern const int NOT_IMPLEMENTED; extern const int FILE_DOESNT_EXIST; extern const int CANNOT_OPEN_FILE; diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp old mode 100755 new mode 100644 From 6eefa7a0a04e698dcb4f6676947c033f4df949c9 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Tue, 19 Jan 2021 15:14:15 +0800 Subject: [PATCH 20/94] Add mkdir --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 1ee68b3ff11..863f39e7bdf 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple +mkidr -p /var/lib/clickhouse/user_files/ echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt From 7c7dd69a88b79c2d07f1a564f34c30a99d57afa1 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Tue, 19 Jan 2021 17:18:21 +0800 Subject: [PATCH 21/94] Fix mkdir --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 863f39e7bdf..1696fc710ad 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple -mkidr -p /var/lib/clickhouse/user_files/ +mkdir -p /var/lib/clickhouse/user_files/ echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt From 8461e896451bb85772a7220ebfb15d3cd2ce2755 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Fri, 22 Jan 2021 11:43:31 +0800 Subject: [PATCH 22/94] Remove getArgumentsThatAreAlwaysConstant, also add 2 testcases --- src/Functions/FunctionFile.cpp | 9 ++++----- .../01658_read_file_to_stringcolumn.reference | 2 ++ .../0_stateless/01658_read_file_to_stringcolumn.sh | 4 ++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index c493b2a2b88..afd24f4d575 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -45,7 +45,6 @@ namespace DB } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { @@ -101,14 +100,14 @@ namespace DB } private: - void checkReadIsAllowed(const std::string & user_files_path, const std::string & file_path) const + void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const { // If run in Local mode, no need for path checking. if (context.getApplicationType() != Context::ApplicationType::LOCAL) - if (file_path.find(user_files_path) != 0) - throw Exception("File is not inside " + user_files_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (file_absolute_path.find(user_files_absolute_path) != 0) + throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED); - Poco::File path_poco_file = Poco::File(file_path); + Poco::File path_poco_file = Poco::File(file_absolute_path); if (path_poco_file.exists() && path_poco_file.isDirectory()) throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); } diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference index 82bc7c9ca90..a22076de920 100644 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -8,6 +8,8 @@ ccccccccc aaaaaaaaa bbbbbbbbb :107 :79 :35 +:35 +:35 699415 aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 1696fc710ad..44810636a7c 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -34,6 +34,10 @@ echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo : # Test path out of the user_files directory. It's not allowed in client mode echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +# Test relative path consists of ".." whose absolute path is out of the user_files directory. +echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + ### 2nd TEST in LOCAL mode. From b3c0baa96775422256fdecd91d6a04b2677dcbe1 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Fri, 22 Jan 2021 15:29:39 +0800 Subject: [PATCH 23/94] fix mkdir with -p --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 44810636a7c..56049b299fb 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -12,7 +12,7 @@ echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt echo -n ccccccccc > /tmp/c.txt -mkdir /var/lib/clickhouse/user_files/dir +mkdir -p /var/lib/clickhouse/user_files/dir ### 1st TEST in CLIENT mode. ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" @@ -45,7 +45,7 @@ echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";e echo -n aaaaaaaaa > a.txt echo -n bbbbbbbbb > b.txt echo -n ccccccccc > c.txt -mkdir dir +mkdir -p dir #Test for large files, with length : 699415 c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}') echo $c_count From 67f1dcd9d3fabad9b0698c08bf60597610dade8f Mon Sep 17 00:00:00 2001 From: keenwolf Date: Fri, 22 Jan 2021 20:37:34 +0800 Subject: [PATCH 24/94] adjust the testcases due to the CI test environment change --- .../01658_read_file_to_stringcolumn.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 56049b299fb..d66b245dc74 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -20,23 +20,23 @@ ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=Merg # Valid cases: -${CLICKHOUSE_CLIENT} --query "select file('a.txt'), file('b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "insert into data select file('a.txt'), file('b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "select file('c.txt'), * from data";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/c.txt'), * from data";echo ":"$? # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) # Test non-exists file -echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null # Test isDir -echo "clickhouse-client --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/dir'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null # Test path out of the user_files directory. It's not allowed in client mode -echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null # Test relative path consists of ".." whose absolute path is out of the user_files directory. echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null -echo "clickhouse-client --query "'"select file('"'../a.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null From 140bcc4dc3dcffd2f4b86d76ee5041e05fef83c3 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 23 Jan 2021 16:45:05 +0800 Subject: [PATCH 25/94] Just to restart the CI test being suspended unexpectedly --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index d66b245dc74..8d4f36a0503 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation +# Data preparation. # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple mkdir -p /var/lib/clickhouse/user_files/ echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt From 154382925902d4d1d764b508bcedbeb477c026c7 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 23 Jan 2021 16:53:43 +0800 Subject: [PATCH 26/94] Clean some comments --- src/Functions/FunctionFile.cpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index afd24f4d575..6b17454619a 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -58,7 +58,6 @@ namespace DB auto & res_chars = res->getChars(); auto & res_offsets = res->getOffsets(); - //File access permission check const String user_files_path = context.getUserFilesPath(); String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); Poco::Path poco_filepath = Poco::Path(filename); @@ -67,27 +66,11 @@ namespace DB const String file_absolute_path = poco_filepath.absolute().toString(); checkReadIsAllowed(user_files_absolute_path, file_absolute_path); - //Method-1: Read file with ReadBuffer ReadBufferFromFile in(file_absolute_path); ssize_t file_len = Poco::File(file_absolute_path).getSize(); res_chars.resize_exact(file_len + 1); char *res_buf = reinterpret_cast(&res_chars[0]); in.readStrict(res_buf, file_len); - - /* - //Method-2(Just for reference): Read directly into the String buf, which avoiding one copy from PageCache to ReadBuffer - int fd; - if (-1 == (fd = open(file_absolute_path.c_str(), O_RDONLY))) - throwFromErrnoWithPath("Cannot open file " + std::string(file_absolute_path), std::string(file_absolute_path), - errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); - if (file_len != pread(fd, res_buf, file_len, 0)) - throwFromErrnoWithPath("Read failed with " + std::string(file_absolute_path), std::string(file_absolute_path), - ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - if (0 != close(fd)) - throw Exception("Cannot close file " + std::string(file_absolute_path), ErrorCodes::CANNOT_CLOSE_FILE); - fd = -1; - */ - res_offsets.push_back(file_len + 1); res_buf[file_len] = '\0'; From c56750c9ceb19abd14bc7961fc0bf4ec0bd4b992 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 23 Jan 2021 21:43:27 +0800 Subject: [PATCH 27/94] Remove ErrorCodes unused --- src/Functions/FunctionFile.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index 6b17454619a..e4327862982 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -15,10 +15,6 @@ namespace DB { extern const int ILLEGAL_COLUMN; extern const int NOT_IMPLEMENTED; - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; extern const int INCORRECT_FILE_NAME; extern const int DATABASE_ACCESS_DENIED; } From 6d23dd2590e21ac3b07688bc2185450279a15988 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Sat, 23 Jan 2021 23:57:08 +0800 Subject: [PATCH 28/94] fix test: to get user_files_path from config --- .../01658_read_file_to_stringcolumn.sh | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 8d4f36a0503..aeaf08cb4d8 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,12 +7,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple -mkdir -p /var/lib/clickhouse/user_files/ -echo -n aaaaaaaaa > /var/lib/clickhouse/user_files/a.txt -echo -n bbbbbbbbb > /var/lib/clickhouse/user_files/b.txt -echo -n ccccccccc > /var/lib/clickhouse/user_files/c.txt +#user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p') +user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"(.*)",path); print path[1]}') +mkdir -p ${user_files_path}/ +echo -n aaaaaaaaa > ${user_files_path}/a.txt +echo -n bbbbbbbbb > ${user_files_path}/b.txt +echo -n ccccccccc > ${user_files_path}/c.txt echo -n ccccccccc > /tmp/c.txt -mkdir -p /var/lib/clickhouse/user_files/dir +mkdir -p ${user_files_path}/dir ### 1st TEST in CLIENT mode. ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" @@ -20,23 +22,23 @@ ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=Merg # Valid cases: -${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "insert into data select file('/var/lib/clickhouse/user_files/a.txt'), file('/var/lib/clickhouse/user_files/b.txt');";echo ":"$? -${CLICKHOUSE_CLIENT} --query "select file('/var/lib/clickhouse/user_files/c.txt'), * from data";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$? # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) # Test non-exists file -echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null # Test isDir -echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/dir'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'${user_files_path}/dir'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null # Test path out of the user_files directory. It's not allowed in client mode -echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null # Test relative path consists of ".." whose absolute path is out of the user_files directory. -echo "clickhouse-client --query "'"select file('"'/var/lib/clickhouse/user_files/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null -echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('/var/lib/clickhouse/user_files/b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null @@ -74,8 +76,8 @@ echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$ # Restore rm -rf a.txt b.txt c.txt dir -rm -rf /var/lib/clickhouse/user_files/a.txt -rm -rf /var/lib/clickhouse/user_files/b.txt -rm -rf /var/lib/clickhouse/user_files/c.txt +rm -rf ${user_files_path}/a.txt +rm -rf ${user_files_path}/b.txt +rm -rf ${user_files_path}/c.txt rm -rf /tmp/c.txt -rm -rf /var/lib/clickhouse/user_files/dir +rm -rf ${user_files_path}/dir From a671ebf3e9e1f58616e9cdba49dda949ac9fe7d6 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Mon, 25 Jan 2021 11:21:09 +0800 Subject: [PATCH 29/94] skip the client test for being unable to get the correct user_files_path --- .../01658_read_file_to_stringcolumn.reference | 12 ------------ .../0_stateless/01658_read_file_to_stringcolumn.sh | 9 ++++++--- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference index a22076de920..eb5f1795f18 100644 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -1,15 +1,3 @@ -aaaaaaaaa bbbbbbbbb -:0 -:0 -:0 -ccccccccc aaaaaaaaa bbbbbbbbb -ccccccccc aaaaaaaaa bbbbbbbbb -:0 -:107 -:79 -:35 -:35 -:35 699415 aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index aeaf08cb4d8..cc8ed3f7294 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. # When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple -#user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p') -user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"(.*)",path); print path[1]}') +user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p') +#user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"(.*)",path); print path[1]}') mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt echo -n bbbbbbbbb > ${user_files_path}/b.txt @@ -16,6 +16,9 @@ echo -n ccccccccc > ${user_files_path}/c.txt echo -n ccccccccc > /tmp/c.txt mkdir -p ${user_files_path}/dir +# Skip the client test part, for being unable to get the correct user_files_path +if false; then + ### 1st TEST in CLIENT mode. ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" ${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;" @@ -40,7 +43,7 @@ echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_fil echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null - +fi ### 2nd TEST in LOCAL mode. From 4a17f5c73ac23a1c3fbe2353d7dcf6a8f94723ee Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 27 Jan 2021 11:24:17 +0800 Subject: [PATCH 30/94] Move condistions from JOIN ON to WHERE --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 54 ++++++++++++++----- src/Interpreters/CollectJoinOnKeysVisitor.h | 5 +- src/Interpreters/TreeRewriter.cpp | 25 +++++++-- .../00878_join_unexpected_results.reference | 2 + .../00878_join_unexpected_results.sql | 8 +-- ...conditions_from_join_on_to_where.reference | 47 ++++++++++++++++ ..._move_conditions_from_join_on_to_where.sql | 27 ++++++++++ 7 files changed, 148 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference create mode 100644 tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 3b3fdaa65cb..a17f68fbf75 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -78,14 +78,48 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(ast, left, right, data); - data.addJoinKeys(left, right, table_numbers); + auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != 0) + { + data.addJoinKeys(left, right, table_numbers); + if (!data.new_on_expression) + data.new_on_expression = ast->clone(); + else + data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone()); + } + else + { + if (!data.new_where_conditions) + data.new_where_conditions = ast->clone(); + else + data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); + + data.move_to_where = true; + } + } else if (inequality != ASOF::Inequality::None) { if (!data.is_asof) - throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::NOT_IMPLEMENTED); + { + ASTPtr left = func.arguments->children.at(0); + ASTPtr right = func.arguments->children.at(1); + auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != 0) + { + throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", + ErrorCodes::NOT_IMPLEMENTED); + } + else + { + if (!data.new_where_conditions) + data.new_where_conditions = ast->clone(); + else + data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); + + data.move_to_where = true; + } + } if (data.asof_left_key || data.asof_right_key) throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'", @@ -93,7 +127,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(ast, left, right, data); + auto table_numbers = getTableNumbers(left, right, data); data.addAsofJoinKeys(left, right, table_numbers, inequality); } @@ -118,7 +152,7 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, +std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data) { std::vector left_identifiers; @@ -128,10 +162,7 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr getIdentifiers(right_ast, right_identifiers); if (left_identifiers.empty() || right_identifiers.empty()) - { - throw Exception("Not equi-join ON expression: " + queryToString(expr) + ". No columns in one of equality side.", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); - } + return std::make_pair(0, 0); size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); @@ -141,8 +172,7 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr auto left_name = queryToString(*left_identifiers[0]); auto right_name = queryToString(*right_identifiers[0]); - throw Exception("In expression " + queryToString(expr) + " columns " + left_name + " and " + right_name - + " are from the same table but from different arguments of equal function", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + return std::make_pair(0, 0); } return std::make_pair(left_idents_table, right_idents_table); diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 54e008a114e..2c2d731a4d7 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -32,6 +32,9 @@ public: const bool is_asof{false}; ASTPtr asof_left_key{}; ASTPtr asof_right_key{}; + ASTPtr new_on_expression{}; + ASTPtr new_where_conditions{}; + bool move_to_where{false}; bool has_some{false}; void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); @@ -57,7 +60,7 @@ private: static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data); static void getIdentifiers(const ASTPtr & ast, std::vector & out); - static std::pair getTableNumbers(const ASTPtr & expr, const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data); + static std::pair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data); static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases); static size_t getTableForIdentifiers(std::vector & identifiers, const Data & data); }; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index eaf46b717fc..7a4eac6eae3 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -400,13 +400,13 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul /// Find the columns that are obtained by JOIN. void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & select_query, - const TablesWithColumns & tables, const Aliases & aliases) + const TablesWithColumns & tables, const Aliases & aliases, ASTPtr & new_where_conditions) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) return; - const auto & table_join = node->table_join->as(); + auto & table_join = node->table_join->as(); if (table_join.using_expression_list) { @@ -425,9 +425,24 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele ErrorCodes::INVALID_JOIN_ON_EXPRESSION); if (is_asof) data.asofToJoinKeys(); + else if (data.move_to_where) + { + table_join.on_expression = (data.new_on_expression)->clone(); + new_where_conditions = data.new_where_conditions; + } } } +/// Move joined key related to only one table to WHERE clause +void moveJoinedKeyToWhere(ASTSelectQuery * select_query, ASTPtr & new_where_conditions) +{ + if (select_query->where()) + select_query->setExpression(ASTSelectQuery::Expression::WHERE, + makeASTFunction("and", new_where_conditions->clone(), select_query->where()->clone())); + else + select_query->setExpression(ASTSelectQuery::Expression::WHERE, new_where_conditions->clone()); +} + std::vector getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { @@ -807,7 +822,11 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); - collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); + + ASTPtr new_where_condition; + collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases, new_where_condition); + if (new_where_condition) + moveJoinedKeyToWhere(select_query, new_where_condition); /// rewrite filters for select query, must go after getArrayJoinedColumns if (settings.optimize_respect_aliases && result.metadata_snapshot) diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference index a389cb47a96..aaf586c2767 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.reference +++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference @@ -23,6 +23,7 @@ join_use_nulls = 1 - \N \N - +2 2 \N \N - 1 1 1 1 2 2 \N \N @@ -49,6 +50,7 @@ join_use_nulls = 0 - - - +2 2 0 0 - 1 1 1 1 2 2 0 0 diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.sql b/tests/queries/0_stateless/00878_join_unexpected_results.sql index 0aef5208b26..6f6cd6e6479 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.sql +++ b/tests/queries/0_stateless/00878_join_unexpected_results.sql @@ -30,11 +30,11 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; select 'join_use_nulls = 0'; set join_use_nulls = 0; @@ -58,11 +58,11 @@ select '-'; select '-'; -- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; drop table t; drop table s; diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference new file mode 100644 index 00000000000..cf5d26b657a --- /dev/null +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference @@ -0,0 +1,47 @@ +---------Q1---------- +2 2 2 20 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE table2.b = toUInt32(20) +---------Q2---------- +2 2 2 20 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE (table2.a < table2.b) AND (table2.b = toUInt32(20)) +---------Q3---------- +---------Q4---------- +6 40 +SELECT + a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = toUInt32(10 - table2.a) +WHERE (b = 6) AND (table2.b > 20) diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql new file mode 100644 index 00000000000..7ba2a3b5c25 --- /dev/null +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS table1; +DROP TABLE IF EXISTS table2; + +CREATE TABLE table1 (a UInt32, b UInt32) ENGINE = Memory; +CREATE TABLE table2 (a UInt32, b UInt32) ENGINE = Memory; + +INSERT INTO table1 SELECT number, number FROM numbers(10); +INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(6); + +SELECT '---------Q1----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20)); +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20)); + +SELECT '---------Q2----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20)); +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20)); + +SELECT '---------Q3----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = toUInt32(table2.a + 5)) AND (table2.a < table1.b) AND (table2.b > toUInt32(20)); -- { serverError 48 } + +SELECT '---------Q4----------'; +SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); +EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); + + +DROP TABLE table1; +DROP TABLE table2; From 9fa3e09bb142cfaf76a352deae12341bab1223bb Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 27 Jan 2021 11:36:15 +0800 Subject: [PATCH 31/94] Add more test cases --- ...ove_conditions_from_join_on_to_where.reference | 15 +++++++++++++++ ...1653_move_conditions_from_join_on_to_where.sql | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference index cf5d26b657a..a58aa254891 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference @@ -45,3 +45,18 @@ ALL INNER JOIN FROM table2 ) AS table2 ON a = toUInt32(10 - table2.a) WHERE (b = 6) AND (table2.b > 20) +---------Q5---------- +SELECT + a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 + WHERE 0 +) AS table2 ON a = table2.a +WHERE 0 +---------Q6---------- diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql index 7ba2a3b5c25..5b861ecfe82 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -22,6 +22,12 @@ SELECT '---------Q4----------'; SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); +SELECT '---------Q5----------'; +SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6); +EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6); + +SELECT '---------Q6----------'; +SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } DROP TABLE table1; DROP TABLE table2; From 5d774c0cd90c8f872406841fb6a152237bc4b2f2 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 28 Jan 2021 19:13:32 +0800 Subject: [PATCH 32/94] find method to get user_files_path --- .../01658_read_file_to_stringcolumn.reference | 12 ++++++++++++ .../0_stateless/01658_read_file_to_stringcolumn.sh | 9 +++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference index eb5f1795f18..a22076de920 100644 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -1,3 +1,15 @@ +aaaaaaaaa bbbbbbbbb +:0 +:0 +:0 +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +:0 +:107 +:79 +:35 +:35 +:35 699415 aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index cc8ed3f7294..6d0f6178cba 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -6,9 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh # Data preparation. -# When run with client mode on different machine to the server, the data-file creation maybe implemented in SQL. Now we just make it simple -user_files_path=$(clickhouse-client --query "select data_path from system.databases where name='default'" | sed -En 's/data\/default/user_files/p') -#user_files_path=$(grep user_files_path ${CLICKHOUSE_CONFIG} | awk '{match($0,"(.*)",path); print path[1]}') +# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: +# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}') mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt echo -n bbbbbbbbb > ${user_files_path}/b.txt @@ -16,8 +16,6 @@ echo -n ccccccccc > ${user_files_path}/c.txt echo -n ccccccccc > /tmp/c.txt mkdir -p ${user_files_path}/dir -# Skip the client test part, for being unable to get the correct user_files_path -if false; then ### 1st TEST in CLIENT mode. ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" @@ -43,7 +41,6 @@ echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_fil echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null -fi ### 2nd TEST in LOCAL mode. From d3763e735b5a0f31f707d3efee05041cac95632d Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 28 Jan 2021 21:18:31 +0800 Subject: [PATCH 33/94] replace mawk with gawk --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 6d0f6178cba..6376040fcc5 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: # "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}') +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | /usr/bin/gawk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}') mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt echo -n bbbbbbbbb > ${user_files_path}/b.txt From c0ac1444cb8c9c4b22663b5aac8da2215bb396b5 Mon Sep 17 00:00:00 2001 From: keenwolf Date: Thu, 28 Jan 2021 23:33:17 +0800 Subject: [PATCH 34/94] adapting to mawk --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 6376040fcc5..3aca8a9980a 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: # "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | /usr/bin/gawk '{match($0,"File (.*)/nonexist.txt",path); print path[1]}') +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt echo -n bbbbbbbbb > ${user_files_path}/b.txt From 643b1da999e060d4c226c2cce65fb21e9a408bac Mon Sep 17 00:00:00 2001 From: keenwolf Date: Fri, 29 Jan 2021 10:14:10 +0800 Subject: [PATCH 35/94] just restart the CI test --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 3aca8a9980a..02b0beee550 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" +# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt From 45aee71fffea2268dcb611b8a6aadaf098c16425 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Wed, 3 Feb 2021 18:52:20 +0800 Subject: [PATCH 36/94] Modified some implementation --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 29 ++++++++++--------- src/Interpreters/CollectJoinOnKeysVisitor.h | 3 +- src/Interpreters/TreeRewriter.cpp | 6 ++-- ...conditions_from_join_on_to_where.reference | 16 ++++++++++ ..._move_conditions_from_join_on_to_where.sql | 9 ++++++ 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index a17f68fbf75..99b8e24ff59 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -78,9 +78,11 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(left, right, data); - if (table_numbers.first != 0) + bool need_optimize = false; + auto table_numbers = getTableNumbers(left, right, data, &need_optimize); + if (!need_optimize) { + // related to two different tables data.addJoinKeys(left, right, table_numbers); if (!data.new_on_expression) data.new_on_expression = ast->clone(); @@ -93,8 +95,6 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as data.new_where_conditions = ast->clone(); else data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); - - data.move_to_where = true; } } @@ -104,7 +104,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(left, right, data); + bool need_optimize_unused = false; + auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused); if (table_numbers.first != 0) { throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", @@ -116,8 +117,6 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as data.new_where_conditions = ast->clone(); else data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); - - data.move_to_where = true; } } @@ -127,7 +126,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(left, right, data); + bool need_optimize_unused; + auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused); data.addAsofJoinKeys(left, right, table_numbers, inequality); } @@ -153,7 +153,7 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, - Data & data) + Data & data, bool *need_optimize) { std::vector left_identifiers; std::vector right_identifiers; @@ -162,17 +162,18 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr getIdentifiers(right_ast, right_identifiers); if (left_identifiers.empty() || right_identifiers.empty()) - return std::make_pair(0, 0); + { + *need_optimize = true; + return {0, 0}; + } size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); if (left_idents_table && left_idents_table == right_idents_table) { - auto left_name = queryToString(*left_identifiers[0]); - auto right_name = queryToString(*right_identifiers[0]); - - return std::make_pair(0, 0); + *need_optimize = true; + return {0, 0}; } return std::make_pair(left_idents_table, right_idents_table); diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 2c2d731a4d7..050acb87ae2 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -34,7 +34,6 @@ public: ASTPtr asof_right_key{}; ASTPtr new_on_expression{}; ASTPtr new_where_conditions{}; - bool move_to_where{false}; bool has_some{false}; void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); @@ -60,7 +59,7 @@ private: static void visit(const ASTFunction & func, const ASTPtr & ast, Data & data); static void getIdentifiers(const ASTPtr & ast, std::vector & out); - static std::pair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data); + static std::pair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data, bool *need_optimize); static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases); static size_t getTableForIdentifiers(std::vector & identifiers, const Data & data); }; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index fdb78aad021..7a194df8f30 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -425,9 +425,9 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele ErrorCodes::INVALID_JOIN_ON_EXPRESSION); if (is_asof) data.asofToJoinKeys(); - else if (data.move_to_where) + else if (data.new_where_conditions != nullptr) { - table_join.on_expression = (data.new_on_expression)->clone(); + table_join.on_expression = data.new_on_expression; new_where_conditions = data.new_where_conditions; } } @@ -438,7 +438,7 @@ void moveJoinedKeyToWhere(ASTSelectQuery * select_query, ASTPtr & new_where_cond { if (select_query->where()) select_query->setExpression(ASTSelectQuery::Expression::WHERE, - makeASTFunction("and", new_where_conditions->clone(), select_query->where()->clone())); + makeASTFunction("and", new_where_conditions, select_query->where())); else select_query->setExpression(ASTSelectQuery::Expression::WHERE, new_where_conditions->clone()); } diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference index a58aa254891..4f4909a0cb5 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference @@ -60,3 +60,19 @@ ALL INNER JOIN ) AS table2 ON a = table2.a WHERE 0 ---------Q6---------- +---------Q7---------- +0 0 0 0 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE (table2.b < toUInt32(40)) AND (b < 1) diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql index 5b861ecfe82..9ec8f0fe156 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -1,3 +1,6 @@ +DROP DATABASE IF EXISTS test_01653; +CREATE DATABASE test_01653; +USE test_01653; DROP TABLE IF EXISTS table1; DROP TABLE IF EXISTS table2; @@ -29,5 +32,11 @@ EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = SELECT '---------Q6----------'; SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } +SELECT '---------Q7----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1; +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10; + DROP TABLE table1; DROP TABLE table2; +DROP DATABASE test_01653; From 1795735950f7a1d223fcb164089e04df2fc682a7 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Thu, 4 Feb 2021 10:23:03 +0800 Subject: [PATCH 37/94] Remove create-db sql in test case --- .../01653_move_conditions_from_join_on_to_where.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql index 9ec8f0fe156..259ff822f3f 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -1,6 +1,3 @@ -DROP DATABASE IF EXISTS test_01653; -CREATE DATABASE test_01653; -USE test_01653; DROP TABLE IF EXISTS table1; DROP TABLE IF EXISTS table2; @@ -39,4 +36,3 @@ SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt DROP TABLE table1; DROP TABLE table2; -DROP DATABASE test_01653; From 409ff2f6b3f7b16cd9c15cca48b3332574bd8cd5 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Thu, 4 Feb 2021 22:13:55 +0300 Subject: [PATCH 38/94] Document system.opentelemetry_span_log system table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Задокументировал системную таблицу system.opentelemetry_span_log. --- .../system-tables/opentelemetry_span_log.md | 49 +++++++++++++++++++ .../system-tables/opentelemetry_span_log.md | 45 +++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 docs/en/operations/system-tables/opentelemetry_span_log.md create mode 100644 docs/ru/operations/system-tables/opentelemetry_span_log.md diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..64fd549458a --- /dev/null +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,49 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries. + +Columns: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. + +- `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. + +- `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. + +**Example** + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +**See Also** + +- [OpenTelemetry](../../operations/opentelemetry.md) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..5c577eb691d --- /dev/null +++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,45 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов. + +Столбцы: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`. + +- `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/). + +- `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`. + +**Пример** + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) From e1359b01a1cc34c7a6e5fead6568b6ecae5ba0a9 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Fri, 5 Feb 2021 11:11:27 +0800 Subject: [PATCH 39/94] Remove unnecessary codes --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 26 ++++++------------- src/Interpreters/CollectJoinOnKeysVisitor.h | 2 +- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 99b8e24ff59..29e3ebc52b0 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -78,9 +78,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - bool need_optimize = false; - auto table_numbers = getTableNumbers(left, right, data, &need_optimize); - if (!need_optimize) + auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != table_numbers.second) { // related to two different tables data.addJoinKeys(left, right, table_numbers); @@ -104,9 +103,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as { ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - bool need_optimize_unused = false; - auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused); - if (table_numbers.first != 0) + auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != table_numbers.second) { throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", ErrorCodes::NOT_IMPLEMENTED); @@ -126,8 +124,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); - bool need_optimize_unused; - auto table_numbers = getTableNumbers(left, right, data, &need_optimize_unused); + auto table_numbers = getTableNumbers(left, right, data); data.addAsofJoinKeys(left, right, table_numbers, inequality); } @@ -152,8 +149,9 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, - Data & data, bool *need_optimize) + Data & data) { std::vector left_identifiers; std::vector right_identifiers; @@ -162,20 +160,11 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr getIdentifiers(right_ast, right_identifiers); if (left_identifiers.empty() || right_identifiers.empty()) - { - *need_optimize = true; return {0, 0}; - } size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); - if (left_idents_table && left_idents_table == right_idents_table) - { - *need_optimize = true; - return {0, 0}; - } - return std::make_pair(left_idents_table, right_idents_table); } @@ -260,6 +249,7 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector & out); - static std::pair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data, bool *need_optimize); + static std::pair getTableNumbers(const ASTPtr & left_ast, const ASTPtr & right_ast, Data & data); static const ASTIdentifier * unrollAliases(const ASTIdentifier * identifier, const Aliases & aliases); static size_t getTableForIdentifiers(std::vector & identifiers, const Data & data); }; From c6c1541c9f8154aafdc66f1a37592454d2b565f0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Feb 2021 10:53:26 +0300 Subject: [PATCH 40/94] Remove assert from CollectJoinOnKeysVisitor.cpp --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 29e3ebc52b0..ba151b7f903 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -249,7 +249,6 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vector Date: Fri, 5 Feb 2021 17:39:05 +0100 Subject: [PATCH 41/94] Add 'access_management' configuration to initial setup --- docker/server/README.md | 8 ++++---- docker/server/entrypoint.sh | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/server/README.md b/docker/server/README.md index d8e9204dffa..6f799d68185 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- 20.12.3.3 ``` -### Volumes +### Volumes Typically you may want to mount the following folders inside your container to archieve persistency: @@ -76,7 +76,7 @@ You may also want to mount: * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). -### Linux capabilities +### Linux capabilities ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). @@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv ### How to create default database and user on starting -Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`: +Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: ``` -$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server +$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server ``` ## How to extend this image diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 549ff601c59..0138a165505 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_ CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" +CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" for dir in "$DATA_DIR" \ "$ERROR_LOG_DIR" \ @@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL ${CLICKHOUSE_PASSWORD} default + ${CLICKHOUSE_ACCESS_MANAGEMENT} From f0370b241c341ce961bac516afbd909631ec6b3d Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sat, 6 Feb 2021 20:17:25 +0300 Subject: [PATCH 42/94] Document the opentelemetry_start_trace_probability setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Задокументировал настройку. --- docs/en/operations/settings/settings.md | 11 +++++++++++ docs/ru/operations/settings/settings.md | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index edfd391c71e..869c76fb975 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2592,4 +2592,15 @@ Possible values: Default value: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Enables a trace for executed queries. + +Possible values: + +- 0 — The trace for a executed query is disabled. +- 1 — The trace for a executed query is enabled. + +Default value: `0`. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index bacc97dfd14..2aa81daa0b0 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2473,4 +2473,15 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; Значение по умолчанию: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Включает трассировку для выполненных запросов. + +Возможные значения: + +- 0 — трассировка для выполненного запроса отключена. +- 1 — трассировка для выполненного запроса включена. + +Значение по умолчанию: `0`. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) From 2c278f1e0272ceec1372ae30800be27ce423d51a Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Mon, 8 Feb 2021 13:44:50 +0800 Subject: [PATCH 43/94] Restrict move JOINON to WHERE optimizer only to inner join --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 51 ++++++++------- src/Interpreters/CollectJoinOnKeysVisitor.h | 2 + src/Interpreters/TreeRewriter.cpp | 2 +- ...conditions_from_join_on_to_where.reference | 62 +++++++++++++++++++ ..._move_conditions_from_join_on_to_where.sql | 10 +++ 5 files changed, 105 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index ba151b7f903..8b5fbeef7eb 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -79,23 +79,26 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); - if (table_numbers.first != table_numbers.second) - { - // related to two different tables - data.addJoinKeys(left, right, table_numbers); - if (!data.new_on_expression) - data.new_on_expression = ast->clone(); - else - data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone()); - } - else + + /** + * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE + */ + if (data.kind == ASTTableJoin::Kind::Inner + && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0)) { if (!data.new_where_conditions) data.new_where_conditions = ast->clone(); else data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); } - + else + { + data.addJoinKeys(left, right, table_numbers); + if (!data.new_on_expression) + data.new_on_expression = ast->clone(); + else + data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone()); + } } else if (inequality != ASOF::Inequality::None) { @@ -104,17 +107,21 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); - if (table_numbers.first != table_numbers.second) - { - throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::NOT_IMPLEMENTED); - } - else + + if (data.kind == ASTTableJoin::Kind::Inner + && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0)) { if (!data.new_where_conditions) data.new_where_conditions = ast->clone(); else data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); + + return; + } + else + { + throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", + ErrorCodes::NOT_IMPLEMENTED); } } @@ -159,11 +166,13 @@ std::pair CollectJoinOnKeysMatcher::getTableNumbers(const ASTPtr getIdentifiers(left_ast, left_identifiers); getIdentifiers(right_ast, right_identifiers); - if (left_identifiers.empty() || right_identifiers.empty()) - return {0, 0}; + size_t left_idents_table = 0; + size_t right_idents_table = 0; - size_t left_idents_table = getTableForIdentifiers(left_identifiers, data); - size_t right_idents_table = getTableForIdentifiers(right_identifiers, data); + if (!left_identifiers.empty()) + left_idents_table = getTableForIdentifiers(left_identifiers, data); + if (!right_identifiers.empty()) + right_idents_table = getTableForIdentifiers(right_identifiers, data); return std::make_pair(left_idents_table, right_idents_table); } diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 42133cf0b6e..aa2fd80d07c 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -30,6 +31,7 @@ public: const TableWithColumnNamesAndTypes & right_table; const Aliases & aliases; const bool is_asof{false}; + ASTTableJoin::Kind kind; ASTPtr asof_left_key{}; ASTPtr asof_right_key{}; ASTPtr new_on_expression{}; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 7a194df8f30..332734e4ca6 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -418,7 +418,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele { bool is_asof = (table_join.strictness == ASTTableJoin::Strictness::Asof); - CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof}; + CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof, table_join.kind}; CollectJoinOnKeysVisitor(data).visit(table_join.on_expression); if (!data.has_some) throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression), diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference index 4f4909a0cb5..19487c9f942 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference @@ -76,3 +76,65 @@ ALL INNER JOIN FROM table2 ) AS table2 ON a = table2.a WHERE (table2.b < toUInt32(40)) AND (b < 1) +---------Q8---------- +---------Q9---will not be optimized---------- +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL LEFT JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL RIGHT JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL FULL OUTER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL FULL OUTER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (table2.b = toUInt32(10)) +WHERE a < toUInt32(20) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +CROSS JOIN table2 diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql index 259ff822f3f..23871a9c47c 100644 --- a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -34,5 +34,15 @@ SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1; SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10; +SELECT '---------Q8----------'; +SELECT * FROM table1 INNER JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(table1, 10)); -- { serverError 47 } + +SELECT '---------Q9---will not be optimized----------'; +EXPLAIN SYNTAX SELECT * FROM table1 LEFT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 RIGHT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(10)) WHERE table1.a < toUInt32(20); +EXPLAIN SYNTAX SELECT * FROM table1 , table2; + DROP TABLE table1; DROP TABLE table2; From 28b981a76b5b1033993b9f3ec8badee4a5526203 Mon Sep 17 00:00:00 2001 From: hexiaoting Date: Tue, 9 Feb 2021 18:08:55 +0800 Subject: [PATCH 44/94] Fix style error and test cases error --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 10 ++++++++-- src/Interpreters/CollectJoinOnKeysVisitor.h | 1 + src/Interpreters/TreeRewriter.cpp | 3 +++ .../00878_join_unexpected_results.reference | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 8b5fbeef7eb..ec413fe08fc 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -80,6 +80,9 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0) + data.new_on_expression_valid = true; + /** * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE */ @@ -108,6 +111,9 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); + if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0) + data.new_on_expression_valid = true; + if (data.kind == ASTTableJoin::Kind::Inner && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0)) { @@ -116,7 +122,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as else data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); - return; + return; } else { @@ -127,7 +133,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if (data.asof_left_key || data.asof_right_key) throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + ErrorCodes::INVALID_JOIN_ON_EXPRESSION); ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index aa2fd80d07c..64547baf7d7 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -37,6 +37,7 @@ public: ASTPtr new_on_expression{}; ASTPtr new_where_conditions{}; bool has_some{false}; + bool new_on_expression_valid{false}; void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no, diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 332734e4ca6..9f788703704 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -425,6 +425,9 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele ErrorCodes::INVALID_JOIN_ON_EXPRESSION); if (is_asof) data.asofToJoinKeys(); + else if (!data.new_on_expression_valid) + throw Exception("JOIN expects left and right joined keys from two joined table in ON section. Unexpected '" + queryToString(data.new_on_expression) + "'", + ErrorCodes::INVALID_JOIN_ON_EXPRESSION); else if (data.new_where_conditions != nullptr) { table_join.on_expression = data.new_on_expression; diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference index aaf586c2767..65fcbc257ca 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.reference +++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference @@ -23,6 +23,7 @@ join_use_nulls = 1 - \N \N - +1 1 \N \N 2 2 \N \N - 1 1 1 1 @@ -50,6 +51,7 @@ join_use_nulls = 0 - - - +1 1 0 0 2 2 0 0 - 1 1 1 1 From ed59b355c0dba42da612546a584b0645ef463019 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Tue, 9 Feb 2021 20:34:16 +0300 Subject: [PATCH 45/94] Update the description of the opentelemetry_start_trace_probability setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Обновил документацию настройки. --- docs/en/operations/settings/settings.md | 7 ++++--- docs/ru/operations/settings/settings.md | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 869c76fb975..0554ea79ecd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2594,12 +2594,13 @@ Default value: `16`. ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} -Enables a trace for executed queries. +Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied). Possible values: -- 0 — The trace for a executed query is disabled. -- 1 — The trace for a executed query is enabled. +- 0 — The trace for a executed queries is disabled (if no parent trace context is supplied). +- (0, 1) — The probability with which the ClickHouse can start a trace for executed queries (if no parent trace context is supplied). For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- 1 — The trace for all executed queries is enabled. Default value: `0`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2aa81daa0b0..47e2666e652 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2475,12 +2475,13 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} -Включает трассировку для выполненных запросов. +Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [родительский контекст](https://www.w3.org/TR/trace-context/) трассировки). Возможные значения: -- 0 — трассировка для выполненного запроса отключена. -- 1 — трассировка для выполненного запроса включена. +- 0 — трассировка для выполненных запросов отключена (если не указан родительский контекст трассировки). +- (0, 1) — вероятность, с которой ClickHouse начнет трассировку для выполненных запросов (если не указан родительский контекст трассировки). Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов. +- 1 — трассировка для всех выполненных запросов включена. Значение по умолчанию: `0`. From d3549aca95c1bcdc2b65617afd35f71ee51be4a9 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Thu, 11 Feb 2021 21:42:15 +0300 Subject: [PATCH 46/94] Fix the description of the table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил описание таблицы. --- .../en/operations/system-tables/opentelemetry_span_log.md | 8 ++++++-- .../ru/operations/system-tables/opentelemetry_span_log.md | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 64fd549458a..e45a989742c 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -18,16 +18,20 @@ Columns: - `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. -- `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. -- `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. **Example** +Query: + ``` sql SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; ``` +Result: + ``` text Row 1: ────── diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md index 5c577eb691d..96555064b0e 100644 --- a/docs/ru/operations/system-tables/opentelemetry_span_log.md +++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md @@ -18,16 +18,20 @@ - `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`. -- `attribute.names` ([Array(String)](../../sql-reference/data-types/array.md)) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/). +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/). -- `attribute.values` ([Array(String)](../../sql-reference/data-types/array.md)) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`. +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`. **Пример** +Запрос: + ``` sql SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; ``` +Результат: + ``` text Row 1: ────── From ce1524c4ebaca545feeaa1493d5ae8e66af8dab9 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Thu, 11 Feb 2021 22:06:30 +0300 Subject: [PATCH 47/94] Update docs/en/operations/settings/settings.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0554ea79ecd..8f1cb186449 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2599,7 +2599,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries Possible values: - 0 — The trace for a executed queries is disabled (if no parent trace context is supplied). -- (0, 1) — The probability with which the ClickHouse can start a trace for executed queries (if no parent trace context is supplied). For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. - 1 — The trace for all executed queries is enabled. Default value: `0`. From 6271709efacad598431127808dae44cd1ac6e0bb Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Thu, 11 Feb 2021 22:23:19 +0300 Subject: [PATCH 48/94] Fix the description of the setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил описание настройки. --- docs/en/operations/settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8f1cb186449..6f028b00a5b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2599,7 +2599,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries Possible values: - 0 — The trace for a executed queries is disabled (if no parent trace context is supplied). -- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. - 1 — The trace for all executed queries is enabled. Default value: `0`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 47e2666e652..434157401fa 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2475,12 +2475,12 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; ## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} -Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [родительский контекст](https://www.w3.org/TR/trace-context/) трассировки). +Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки). Возможные значения: -- 0 — трассировка для выполненных запросов отключена (если не указан родительский контекст трассировки). -- (0, 1) — вероятность, с которой ClickHouse начнет трассировку для выполненных запросов (если не указан родительский контекст трассировки). Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов. +- 0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки). +- Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов. - 1 — трассировка для всех выполненных запросов включена. Значение по умолчанию: `0`. From 2a52aa8ca30146c8eede353d5a4886781d82d53d Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Fri, 12 Feb 2021 20:25:40 +0300 Subject: [PATCH 49/94] fix test --- CMakeLists.txt | 1 - src/Functions/ya.make | 1 + tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 + tests/queries/skip_list.json | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9002f1df140..853b2df7aca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,7 +490,6 @@ include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/odbc.cmake) include (cmake/find/rocksdb.cmake) -include (cmake/find/libpqxx.cmake) include (cmake/find/nuraft.cmake) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 7f9c7add0b8..173c71ee557 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -39,6 +39,7 @@ SRCS( CRC.cpp FunctionFQDN.cpp FunctionFactory.cpp + FunctionFile.cpp FunctionHelpers.cpp FunctionJoinGet.cpp FunctionsAES.cpp diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 02b0beee550..43e1e11a193 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -9,6 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: # "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + mkdir -p ${user_files_path}/ echo -n aaaaaaaaa > ${user_files_path}/a.txt echo -n bbbbbbbbb > ${user_files_path}/b.txt diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 53fcfe8b13f..7a0bd3375f3 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -581,5 +581,6 @@ "memory_leak", "memory_limit", "polygon_dicts" // they use an explicitly specified database + "01658_read_file_to_stringcolumn" ] } From 609ced42ef5948f7e8ad9af7e275f3cc88ab5320 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Fri, 12 Feb 2021 20:27:55 +0300 Subject: [PATCH 50/94] better --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 853b2df7aca..9002f1df140 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,7 @@ include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/odbc.cmake) include (cmake/find/rocksdb.cmake) +include (cmake/find/libpqxx.cmake) include (cmake/find/nuraft.cmake) From 801d109234f68baceb7894f0008790248192d723 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Fri, 12 Feb 2021 22:05:31 +0300 Subject: [PATCH 51/94] fix --- tests/queries/skip_list.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 7a0bd3375f3..f3a21092aa0 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -580,7 +580,7 @@ "live_view", "memory_leak", "memory_limit", - "polygon_dicts" // they use an explicitly specified database + "polygon_dicts", // they use an explicitly specified database "01658_read_file_to_stringcolumn" ] } From 184ec67dac727f89702ce12db5d7b51a8dfc2f25 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 12 Feb 2021 22:23:50 +0300 Subject: [PATCH 52/94] better ddl queue cleanup --- src/Common/ZooKeeper/ZooKeeper.cpp | 21 +-- src/Common/ZooKeeper/ZooKeeper.h | 11 +- src/Interpreters/DDLWorker.cpp | 149 +++++++++++------- .../test_distributed_ddl/cluster.py | 8 +- .../integration/test_distributed_ddl/test.py | 2 +- .../test_replicated_alter.py | 2 +- 6 files changed, 114 insertions(+), 79 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 4537d5ad8cd..a1c6eb9b481 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -602,7 +602,7 @@ void ZooKeeper::removeChildren(const std::string & path) } -void ZooKeeper::removeChildrenRecursive(const std::string & path) +void ZooKeeper::removeChildrenRecursive(const std::string & path, const String & keep_child_node) { Strings children = getChildren(path); while (!children.empty()) @@ -611,14 +611,15 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path) for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i) { removeChildrenRecursive(path + "/" + children.back()); - ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1)); + if (likely(keep_child_node.empty() || keep_child_node != children.back())) + ops.emplace_back(makeRemoveRequest(path + "/" + children.back(), -1)); children.pop_back(); } multi(ops); } } -void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path) +void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node) { Strings children; if (tryGetChildren(path, children) != Coordination::Error::ZOK) @@ -629,14 +630,14 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path) Strings batch; for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i) { - batch.push_back(path + "/" + children.back()); + String child_path = path + "/" + children.back(); + tryRemoveChildrenRecursive(child_path); + if (likely(keep_child_node.empty() || keep_child_node != children.back())) + { + batch.push_back(child_path); + ops.emplace_back(zkutil::makeRemoveRequest(child_path, -1)); + } children.pop_back(); - tryRemoveChildrenRecursive(batch.back()); - - Coordination::RemoveRequest request; - request.path = batch.back(); - - ops.emplace_back(std::make_shared(std::move(request))); } /// Try to remove the children with a faster method - in bulk. If this fails, diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 0d9dc104c48..90d15e2ac4a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -184,6 +184,12 @@ public: /// result would be the same as for the single call. void tryRemoveRecursive(const std::string & path); + /// Similar to removeRecursive(...) and tryRemoveRecursive(...), but does not remove path itself. + /// If keep_child_node is not empty, this method will not remove path/keep_child_node (but will remove its subtree). + /// It can be useful to keep some child node as a flag which indicates that path is currently removing. + void removeChildrenRecursive(const std::string & path, const String & keep_child_node = {}); + void tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node = {}); + /// Remove all children nodes (non recursive). void removeChildren(const std::string & path); @@ -246,9 +252,6 @@ private: void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); - void removeChildrenRecursive(const std::string & path); - void tryRemoveChildrenRecursive(const std::string & path); - /// The following methods don't throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); Coordination::Error removeImpl(const std::string & path, int32_t version); @@ -320,7 +323,7 @@ public: catch (...) { ProfileEvents::increment(ProfileEvents::CannotRemoveEphemeralNode); - DB::tryLogCurrentException(__PRETTY_FUNCTION__); + DB::tryLogCurrentException(__PRETTY_FUNCTION__, "Cannot remove " + path + ": "); } } diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 05370a6a3b7..fc460a5584c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -652,15 +652,10 @@ void DDLWorker::enqueueTask(DDLTaskPtr task_ptr) { recoverZooKeeper(); } - else if (e.code == Coordination::Error::ZNONODE) - { - LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true)); - // TODO: retry? - } else { LOG_ERROR(log, "Unexpected ZooKeeper error: {}.", getCurrentExceptionMessage(true)); - return; + throw; } } catch (...) @@ -695,25 +690,44 @@ void DDLWorker::processTask(DDLTask & task) LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query); - String dummy; String active_node_path = task.entry_path + "/active/" + task.host_id_str; String finished_node_path = task.entry_path + "/finished/" + task.host_id_str; - auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy); + /// It will tryRemove(...) on exception + auto active_node = zkutil::EphemeralNodeHolder::existing(active_node_path, *zookeeper); - if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) + /// Try fast path + auto create_active_res = zookeeper->tryCreate(active_node_path, {}, zkutil::CreateMode::Ephemeral); + if (create_active_res != Coordination::Error::ZOK) { - // Ok + if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) + { + assert(Coordination::isHardwareError(create_active_res)); + throw Coordination::Exception(create_active_res, active_node_path); + } + + /// Status dirs were not created in enqueueQuery(...) or someone is removing entry + if (create_active_res == Coordination::Error::ZNONODE) + createStatusDirs(task.entry_path, zookeeper); + + if (create_active_res == Coordination::Error::ZNODEEXISTS) + { + /// Connection has been lost and now we are retrying to write query status, + /// but our previous ephemeral node still exists. + assert(task.was_executed); + zkutil::EventPtr eph_node_disappeared = std::make_shared(); + String dummy; + if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared)) + { + constexpr int timeout_ms = 5000; + if (!eph_node_disappeared->tryWait(timeout_ms)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists, " + "probably it's owned by someone else", active_node_path); + } + } + + zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral); } - else if (code == Coordination::Error::ZNONODE) - { - /// There is no parent - createStatusDirs(task.entry_path, zookeeper); - if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy)) - throw Coordination::Exception(code, active_node_path); - } - else - throw Coordination::Exception(code, active_node_path); if (!task.was_executed) { @@ -969,7 +983,6 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo String node_name = *it; String node_path = fs::path(queue_dir) / node_name; - String lock_path = fs::path(node_path) / "lock"; Coordination::Stat stat; String dummy; @@ -991,19 +1004,14 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo if (!node_lifetime_is_expired && !node_is_outside_max_window) continue; - /// Skip if there are active nodes (it is weak guard) - if (zookeeper->exists(fs::path(node_path) / "active", &stat) && stat.numChildren > 0) + /// At first we remove entry/active node to prevent staled hosts from executing entry concurrently + auto rm_active_res = zookeeper->tryRemove(fs::path(node_path) / "active"); + if (rm_active_res != Coordination::Error::ZOK && rm_active_res != Coordination::Error::ZNONODE) { - LOG_INFO(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name); - continue; - } - - /// Usage of the lock is not necessary now (tryRemoveRecursive correctly removes node in a presence of concurrent cleaners) - /// But the lock will be required to implement system.distributed_ddl_queue table - auto lock = createSimpleZooKeeperLock(zookeeper, node_path, "lock", host_fqdn_id); - if (!lock->tryLock()) - { - LOG_INFO(log, "Task {} should be deleted, but it is locked. Skipping it.", node_name); + if (rm_active_res == Coordination::Error::ZNOTEMPTY) + LOG_DEBUG(log, "Task {} should be deleted, but there are active workers. Skipping it.", node_name); + else + LOG_WARNING(log, "Unexpected status code {} on attempt to remove {}/active", rm_active_res, node_name); continue; } @@ -1012,21 +1020,33 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo else if (node_is_outside_max_window) LOG_INFO(log, "Task {} is outdated, deleting it", node_name); - /// Deleting - { - Strings children = zookeeper->getChildren(node_path); - for (const String & child : children) - { - if (child != "lock") - zookeeper->tryRemoveRecursive(fs::path(node_path) / child); - } + /// We recursively delete all nodes except node_path/finished to prevent staled hosts from + /// creating node_path/active node (see createStatusDirs(...)) + zookeeper->tryRemoveChildrenRecursive(node_path, "finished"); - /// Remove the lock node and its parent atomically - Coordination::Requests ops; - ops.emplace_back(zkutil::makeRemoveRequest(lock_path, -1)); - ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1)); - zookeeper->multi(ops); + /// And then we remove node_path and node_path/finished in a single transaction + Coordination::Requests ops; + Coordination::Responses res; + ops.emplace_back(zkutil::makeCheckRequest(node_path, -1)); /// See a comment below + ops.emplace_back(zkutil::makeRemoveRequest(fs::path(node_path) / "finished", -1)); + ops.emplace_back(zkutil::makeRemoveRequest(node_path, -1)); + auto rm_entry_res = zookeeper->tryMulti(ops, res); + if (rm_entry_res == Coordination::Error::ZNONODE) + { + /// Most likely both node_path/finished and node_path were removed concurrently. + bool entry_removed_concurrently = res[0]->error == Coordination::Error::ZNONODE; + if (entry_removed_concurrently) + continue; + + /// Possible rare case: initiator node has lost connection after enqueueing entry and failed to create status dirs. + /// No one has started to process the entry, so node_path/active and node_path/finished nodes were never created, node_path has no children. + /// Entry became outdated, but we cannot remove remove it in a transaction with node_path/finished. + assert(res[0]->error == Coordination::Error::ZOK && res[1]->error == Coordination::Error::ZNONODE); + rm_entry_res = zookeeper->tryRemove(node_path); + assert(rm_entry_res != Coordination::Error::ZNOTEMPTY); + continue; } + zkutil::KeeperMultiException::check(rm_entry_res, ops, res); } catch (...) { @@ -1040,21 +1060,32 @@ void DDLWorker::cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zo void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperPtr & zookeeper) { Coordination::Requests ops; - { - Coordination::CreateRequest request; - request.path = fs::path(node_path) / "active"; - ops.emplace_back(std::make_shared(std::move(request))); - } - { - Coordination::CreateRequest request; - request.path = fs::path(node_path) / "finished"; - ops.emplace_back(std::make_shared(std::move(request))); - } + ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "active", {}, zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(fs::path(node_path) / "finished", {}, zkutil::CreateMode::Persistent)); + Coordination::Responses responses; Coordination::Error code = zookeeper->tryMulti(ops, responses); - if (code != Coordination::Error::ZOK - && code != Coordination::Error::ZNODEEXISTS) - throw Coordination::Exception(code); + + bool both_created = code == Coordination::Error::ZOK; + + /// Failed on attempt to create node_path/active because it exists, so node_path/finished must exist too + bool both_already_exists = responses.size() == 2 && responses[0]->error == Coordination::Error::ZNODEEXISTS + && responses[1]->error == Coordination::Error::ZRUNTIMEINCONSISTENCY; + assert(!both_already_exists || (zookeeper->exists(fs::path(node_path) / "active") && zookeeper->exists(fs::path(node_path) / "finished"))); + + /// Failed on attempt to create node_path/finished, but node_path/active does not exist + bool is_currently_deleting = responses.size() == 2 && responses[0]->error == Coordination::Error::ZOK + && responses[1]->error == Coordination::Error::ZNODEEXISTS; + if (both_created || both_already_exists) + return; + + if (is_currently_deleting) + throw Exception(ErrorCodes::UNFINISHED, "Cannot create status dirs for {}, " + "most likely because someone is deleting it concurrently", node_path); + + /// Connection lost or entry was removed + assert(Coordination::isHardwareError(code) || code == Coordination::Error::ZNONODE); + zkutil::KeeperMultiException::check(code, ops, responses); } @@ -1114,7 +1145,7 @@ void DDLWorker::runMainThread() if (!Coordination::isHardwareError(e.code)) { /// A logical error. - LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.",getCurrentExceptionMessage(true)); + LOG_ERROR(log, "ZooKeeper error: {}. Failed to start DDLWorker.", getCurrentExceptionMessage(true)); reset_state(false); assert(false); /// Catch such failures in tests with debug build } diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py index 811eb94bad4..24f11fec547 100644 --- a/tests/integration/test_distributed_ddl/cluster.py +++ b/tests/integration/test_distributed_ddl/cluster.py @@ -10,8 +10,8 @@ from helpers.test_tools import TSV class ClickHouseClusterWithDDLHelpers(ClickHouseCluster): - def __init__(self, base_path, config_dir): - ClickHouseCluster.__init__(self, base_path) + def __init__(self, base_path, config_dir, testcase_name): + ClickHouseCluster.__init__(self, base_path, name=testcase_name) self.test_config_dir = config_dir @@ -104,8 +104,8 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster): def ddl_check_there_are_no_dublicates(instance): query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)" rows = instance.query(query) - assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, - instance.ip_address, query) + assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}: {}".format(instance.name, + instance.ip_address, rows) @staticmethod def insert_reliable(instance, query_insert): diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py index f0e78dfec41..58e1d0d06f7 100755 --- a/tests/integration/test_distributed_ddl/test.py +++ b/tests/integration/test_distributed_ddl/test.py @@ -14,7 +14,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers @pytest.fixture(scope="module", params=["configs", "configs_secure"]) def test_cluster(request): - cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param) + cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, request.param) try: cluster.prepare() diff --git a/tests/integration/test_distributed_ddl/test_replicated_alter.py b/tests/integration/test_distributed_ddl/test_replicated_alter.py index bd95f5660b7..148ad5fca5e 100644 --- a/tests/integration/test_distributed_ddl/test_replicated_alter.py +++ b/tests/integration/test_distributed_ddl/test_replicated_alter.py @@ -12,7 +12,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers @pytest.fixture(scope="module", params=["configs", "configs_secure"]) def test_cluster(request): - cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param) + cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, "alters_" + request.param) try: # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity. From 69d4120982fa2b7cae35da83532c8318f44bfc8f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 12 Feb 2021 10:22:18 +0800 Subject: [PATCH 53/94] Disable table function view in expression --- src/Parsers/ASTFunction.cpp | 8 ++++++++ .../0_stateless/01715_table_function_view_fix.reference | 0 .../queries/0_stateless/01715_table_function_view_fix.sql | 1 + 3 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/01715_table_function_view_fix.reference create mode 100644 tests/queries/0_stateless/01715_table_function_view_fix.sql diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 806b8e6c5b9..29ac01eefc5 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -15,8 +15,16 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNEXPECTED_EXPRESSION; +} + void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { + if (name == "view") + throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION); + writeString(name, ostr); if (parameters) diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.reference b/tests/queries/0_stateless/01715_table_function_view_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql new file mode 100644 index 00000000000..21da116f6ba --- /dev/null +++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql @@ -0,0 +1 @@ +SELECT view(SELECT 1); -- { serverError 183 } From a551edd8d6e308569433a9158df1ee31a60844de Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 13 Feb 2021 13:18:14 +0800 Subject: [PATCH 54/94] Do not parse view function in expression --- src/Parsers/ASTFunction.cpp | 8 -- src/Parsers/ExpressionElementParsers.cpp | 81 ++++++++++++------- src/Parsers/ExpressionElementParsers.h | 16 +++- src/Parsers/ExpressionListParsers.cpp | 17 +++- src/Parsers/ExpressionListParsers.h | 22 ++++- src/Parsers/ParserTablesInSelectQuery.cpp | 2 +- .../01715_table_function_view_fix.sql | 2 +- 7 files changed, 98 insertions(+), 50 deletions(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 29ac01eefc5..806b8e6c5b9 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -15,16 +15,8 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNEXPECTED_EXPRESSION; -} - void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { - if (name == "view") - throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION); - writeString(name, ostr); if (parameters) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e7cd85798b9..3d868812304 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -266,7 +266,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserIdentifier id_parser; ParserKeyword distinct("DISTINCT"); ParserKeyword all("ALL"); - ParserExpressionList contents(false); + ParserExpressionList contents(false, is_table_function); ParserSelectWithUnionQuery select; ParserKeyword over("OVER"); @@ -278,6 +278,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr expr_list_args; ASTPtr expr_list_params; + if (is_table_function) + { + if (ParserTableFunctionView().parse(pos, node, expected)) + return true; + } + if (!id_parser.parse(pos, identifier, expected)) return false; @@ -312,36 +318,6 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - if (!has_distinct && !has_all) - { - auto old_pos = pos; - auto maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (select.parse(pos, query, expected)) - { - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - pos = old_pos; - } - else - { - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - auto expr_list_with_single_query = std::make_shared(); - expr_list_with_single_query->children.push_back(query); - function_node->arguments = expr_list_with_single_query; - function_node->children.push_back(function_node->arguments); - node = function_node; - return true; - } - } - } - const char * contents_begin = pos->begin; if (!contents.parse(pos, expr_list_args, expected)) return false; @@ -477,6 +453,49 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserIdentifier id_parser; + ParserKeyword view("VIEW"); + ParserSelectWithUnionQuery select; + + ASTPtr identifier; + ASTPtr query; + + if (!view.ignore(pos, expected)) + return false; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + + ++pos; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!select.parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + auto function_node = std::make_shared(); + tryGetIdentifierNameInto(identifier, function_node->name); + auto expr_list_with_single_query = std::make_shared(); + expr_list_with_single_query->children.push_back(query); + function_node->name = "view"; + function_node->arguments = expr_list_with_single_query; + function_node->children.push_back(function_node->arguments); + node = function_node; + return true; +} + bool ParserWindowReference::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTFunction * function = dynamic_cast(node.get()); diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index ba18fc2cddd..b6194f981fe 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -149,11 +149,25 @@ protected: class ParserFunction : public IParserBase { public: - ParserFunction(bool allow_function_parameters_ = true) : allow_function_parameters(allow_function_parameters_) {} + ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) + : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) + { + } + protected: const char * getName() const override { return "function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_function_parameters; + bool is_table_function; +}; + +// A special function parser for view table function. +// It parses an SELECT query as its argument and doesn't support getColumnName(). +class ParserTableFunctionView : public IParserBase +{ +protected: + const char * getName() const override { return "function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; // Window reference (the thing that goes after OVER) for window function. diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index afe85f069c7..e9ad65af471 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -468,6 +468,14 @@ bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } +bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (ParserTableFunctionView().parse(pos, node, expected)) + return true; + return elem_parser.parse(pos, node, expected); +} + + bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// try to find any of the valid operators @@ -570,9 +578,10 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected } -ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword) - : impl(std::make_unique(std::make_unique(), - allow_alias_without_as_keyword)) +ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) + : impl(std::make_unique( + is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), + allow_alias_without_as_keyword)) { } @@ -580,7 +589,7 @@ ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_ bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserList( - std::make_unique(allow_alias_without_as_keyword), + std::make_unique(allow_alias_without_as_keyword, is_table_function), std::make_unique(TokenType::Comma)) .parse(pos, node, expected); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 90b27950873..2371e006c09 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -436,13 +436,26 @@ protected: }; +// It's used to parse expressions in table function. +class ParserTableFunctionExpression : public IParserBase +{ +private: + ParserLambdaExpression elem_parser; + +protected: + const char * getName() const override { return "table function expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + using ParserExpression = ParserLambdaExpression; class ParserExpressionWithOptionalAlias : public IParserBase { public: - ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword); + explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false); protected: ParserPtr impl; @@ -459,11 +472,12 @@ protected: class ParserExpressionList : public IParserBase { public: - ParserExpressionList(bool allow_alias_without_as_keyword_) - : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} + explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {} protected: bool allow_alias_without_as_keyword; + bool is_table_function; // This expression list is used by a table function const char * getName() const override { return "list of expressions"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -473,7 +487,7 @@ protected: class ParserNotEmptyExpressionList : public IParserBase { public: - ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword) + explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword) : nested_parser(allow_alias_without_as_keyword) {} private: ParserExpressionList nested_parser; diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 1264acefe64..2e20279dbe1 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -22,7 +22,7 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec auto res = std::make_shared(); if (!ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->subquery, expected) - && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->table_function, expected) + && !ParserWithOptionalAlias(std::make_unique(true, true), true).parse(pos, res->table_function, expected) && !ParserWithOptionalAlias(std::make_unique(false, true), true).parse(pos, res->database_and_table_name, expected)) return false; diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql index 21da116f6ba..de5150b7b70 100644 --- a/tests/queries/0_stateless/01715_table_function_view_fix.sql +++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql @@ -1 +1 @@ -SELECT view(SELECT 1); -- { serverError 183 } +SELECT view(SELECT 1); -- { clientError 62 } From b0f2a84306f34eb3d69fdbe40f841fc91bff8149 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 14 Feb 2021 01:12:10 +0300 Subject: [PATCH 55/94] fix bad test --- tests/queries/0_stateless/01669_columns_declaration_serde.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01669_columns_declaration_serde.sql b/tests/queries/0_stateless/01669_columns_declaration_serde.sql index 8e3354d63cd..a6bf1184e9f 100644 --- a/tests/queries/0_stateless/01669_columns_declaration_serde.sql +++ b/tests/queries/0_stateless/01669_columns_declaration_serde.sql @@ -22,12 +22,12 @@ DROP TABLE IF EXISTS test_r1; DROP TABLE IF EXISTS test_r2; CREATE TABLE test_r1 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r1') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r1') ORDER BY "\\"; INSERT INTO test_r1 ("\\") VALUES ('\\'); CREATE TABLE test_r2 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r2') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r2') ORDER BY "\\"; SYSTEM SYNC REPLICA test_r2; From 48d38e497871556ce6bf3de2b18f8140a5474dbd Mon Sep 17 00:00:00 2001 From: damozhaeva <68770561+damozhaeva@users.noreply.github.com> Date: Sun, 14 Feb 2021 14:19:26 +0300 Subject: [PATCH 56/94] Update docs/ru/operations/settings/settings.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 716345a9560..bd7fa97db5d 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1985,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset} -ППозволяет изменить кодировку, которая используется для рисования таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII. +Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII. **Пример** From 89f2cf52f3798b7280391d86a170da6651e2857a Mon Sep 17 00:00:00 2001 From: tavplubix Date: Sun, 14 Feb 2021 14:24:54 +0300 Subject: [PATCH 57/94] Update skip_list.json --- tests/queries/skip_list.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 07250cd9c90..0b4ac2b581b 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -574,6 +574,7 @@ "01676_dictget_in_default_expression", "01715_background_checker_blather_zookeeper", "01700_system_zookeeper_path_in", + "01669_columns_declaration_serde", "attach", "ddl_dictionaries", "dictionary", From 7f9436381f175eae6326bc7ddc9970f31849e499 Mon Sep 17 00:00:00 2001 From: Daria Mozhaeva Date: Sun, 14 Feb 2021 14:48:26 +0300 Subject: [PATCH 58/94] fixed conflict --- docs/en/operations/settings/settings.md | 79 +------------------------ 1 file changed, 1 insertion(+), 78 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c91ed1f2400..50108531310 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -311,12 +311,8 @@ Enables or disables parsing enum values as enum ids for TSV input format. Possible values: -<<<<<<< HEAD -Enables or disables using default values if input data contain `NULL`, but the data type of the corresponding column in not `Nullable(T)` (for text input formats). -======= - 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs ->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c +- 1 — Enum values are parsed as enum IDs. Default value: 0. @@ -1318,15 +1314,7 @@ See also: ## insert_quorum_timeout {#settings-insert_quorum_timeout} -<<<<<<< HEAD -<<<<<<< HEAD -Write to a quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. -======= Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. ->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c -======= -Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. ->>>>>>> 547db452d63ba42b88e82cbe9a2aa1f5c683403f Default value: 600 000 milliseconds (ten minutes). @@ -1625,11 +1613,7 @@ Default value: 0. - Type: seconds - Default value: 60 seconds -<<<<<<< HEAD -Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after the last error. -======= Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after the last error. ->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c See also: @@ -1875,8 +1859,6 @@ Default value: `0`. - [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed) - [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed) -<<<<<<< HEAD -======= ## insert_distributed_one_random_shard {#insert_distributed_one_random_shard} Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table when there is no distributed key. @@ -1907,7 +1889,6 @@ Default value: `1`. ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} ->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. Possible values: @@ -2224,11 +2205,7 @@ Default value: `0`. ## lock_acquire_timeout {#lock_acquire_timeout} -<<<<<<< HEAD -Defines how many seconds a locking request waits before failing. -======= Defines how many seconds a locking request waits before failing. ->>>>>>> 6325b15a63335e2efd7de1ae92d2907493a07a9c Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`. @@ -2615,58 +2592,4 @@ Possible values: Default value: `16`. -## optimize_on_insert {#optimize-on-insert} - -Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine). - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -**Example** - -The difference between enabled and disabled: - -Query: - -```sql -SET optimize_on_insert = 1; - -CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; - -INSERT INTO test1 SELECT number % 2 FROM numbers(5); - -SELECT * FROM test1; - -SET optimize_on_insert = 0; - -CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; - -INSERT INTO test2 SELECT number % 2 FROM numbers(5); - -SELECT * FROM test2; -``` - -Result: - -``` text -┌─FirstTable─┐ -│ 0 │ -│ 1 │ -└────────────┘ - -┌─SecondTable─┐ -│ 0 │ -│ 0 │ -│ 0 │ -│ 1 │ -│ 1 │ -└─────────────┘ -``` - -Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. - [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) From 00bb72735eef1b11f406a8e139d4667d8c7e8b4d Mon Sep 17 00:00:00 2001 From: Daria Mozhaeva Date: Sun, 14 Feb 2021 15:55:40 +0300 Subject: [PATCH 59/94] add text --- docs/en/operations/settings/settings.md | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 50108531310..40a68491682 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2592,4 +2592,58 @@ Possible values: Default value: `16`. +## optimize_on_insert {#optimize-on-insert} + +Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine). + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +**Example** + +The difference between enabled and disabled: + +Query: + +```sql +SET optimize_on_insert = 1; + +CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; + +INSERT INTO test1 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test1; + +SET optimize_on_insert = 0; + +CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; + +INSERT INTO test2 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test2; +``` + +Result: + +``` text +┌─FirstTable─┐ +│ 0 │ +│ 1 │ +└────────────┘ + +┌─SecondTable─┐ +│ 0 │ +│ 0 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────┘ +``` + +Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) From c1550814ca770a0ecb9aec0de8eeb77dee266ca4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Feb 2021 10:30:08 +0300 Subject: [PATCH 60/94] Disable snapshots for tests --- src/Coordination/NuKeeperStateMachine.cpp | 2 ++ tests/config/config.d/test_keeper_port.xml | 2 ++ tests/queries/skip_list.json | 14 ++------------ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/Coordination/NuKeeperStateMachine.cpp b/src/Coordination/NuKeeperStateMachine.cpp index d282f57ce73..0061645c75c 100644 --- a/src/Coordination/NuKeeperStateMachine.cpp +++ b/src/Coordination/NuKeeperStateMachine.cpp @@ -166,6 +166,8 @@ void NuKeeperStateMachine::create_snapshot( } } + + LOG_DEBUG(log, "Created snapshot {}", s.get_last_log_idx()); nuraft::ptr except(nullptr); bool ret = true; when_done(ret, except); diff --git a/tests/config/config.d/test_keeper_port.xml b/tests/config/config.d/test_keeper_port.xml index 6ca00a972d4..97c6d7c2e33 100644 --- a/tests/config/config.d/test_keeper_port.xml +++ b/tests/config/config.d/test_keeper_port.xml @@ -6,6 +6,8 @@ 10000 30000 + 0 + 0 diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index ee25bee6a0a..e4e7504ba41 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -10,7 +10,6 @@ "00152_insert_different_granularity", "00151_replace_partition_with_different_granularity", "00157_cache_dictionary", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01193_metadata_loading", "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers @@ -26,7 +25,6 @@ "memory_profiler", "odbc_roundtrip", "01103_check_cpu_instructions_at_startup", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers "01193_metadata_loading" @@ -37,7 +35,6 @@ "memory_profiler", "01103_check_cpu_instructions_at_startup", "00900_orc_load", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers "01193_metadata_loading" @@ -49,7 +46,6 @@ "01103_check_cpu_instructions_at_startup", "01086_odbc_roundtrip", /// can't pass because odbc libraries are not instrumented "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers "01193_metadata_loading" @@ -61,7 +57,6 @@ "00980_alter_settings_race", "00834_kill_mutation_replicated_zookeeper", "00834_kill_mutation", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01200_mutations_memory_consumption", "01103_check_cpu_instructions_at_startup", "01037_polygon_dicts_", @@ -87,7 +82,6 @@ "00505_secure", "00505_shard_secure", "odbc_roundtrip", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "01103_check_cpu_instructions_at_startup", "01114_mysql_database_engine_segfault", "00834_cancel_http_readonly_queries_on_client_close", @@ -101,19 +95,16 @@ "01455_time_zones" ], "release-build": [ - "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin) ], "database-ordinary": [ "00604_show_create_database", "00609_mv_index_in_in", "00510_materizlized_view_and_deduplication_zookeeper", - "00738_lock_for_inner_table", - "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin) + "00738_lock_for_inner_table" ], "polymorphic-parts": [ "01508_partition_pruning_long", /// bug, shoud be fixed - "01482_move_to_prewhere_and_cast", /// bug, shoud be fixed - "00992_system_parts_race_condition_zookeeper" /// TODO remove me (alesapin) + "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed ], "antlr": [ "00186_very_long_arrays", @@ -153,7 +144,6 @@ "00982_array_enumerate_uniq_ranked", "00984_materialized_view_to_columns", "00988_constraints_replication_zookeeper", - "00992_system_parts_race_condition_zookeeper", /// TODO remove me (alesapin) "00995_order_by_with_fill", "01001_enums_in_in_section", "01011_group_uniq_array_memsan", From 02198d091ed5539e6683c607a6ee169edb09041c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 15 Feb 2021 10:45:19 +0300 Subject: [PATCH 61/94] Add proper checks while parsing directory names for async INSERT (fixes SIGSEGV) --- src/Storages/Distributed/DirectoryMonitor.cpp | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index bf15ca22ca9..6fe98c53b3e 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -48,6 +48,7 @@ namespace ErrorCodes extern const int TOO_LARGE_SIZE_COMPRESSED; extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int EMPTY_DATA_PASSED; + extern const int INCORRECT_FILE_NAME; } @@ -56,14 +57,26 @@ namespace constexpr const std::chrono::minutes decrease_error_count_period{5}; template - ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory) + ConnectionPoolPtrs createPoolsForAddresses(const std::string & name, PoolFactory && factory, Poco::Logger * log) { ConnectionPoolPtrs pools; for (auto it = boost::make_split_iterator(name, boost::first_finder(",")); it != decltype(it){}; ++it) { Cluster::Address address = Cluster::Address::fromFullString(boost::copy_range(*it)); - pools.emplace_back(factory(address)); + try + { + pools.emplace_back(factory(address)); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::INCORRECT_FILE_NAME) + { + tryLogCurrentException(log); + continue; + } + throw; + } } return pools; @@ -351,16 +364,30 @@ void StorageDistributedDirectoryMonitor::run() ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage) { - const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr + const auto pool_factory = [&storage, &name] (const Cluster::Address & address) -> ConnectionPoolPtr { const auto & cluster = storage.getCluster(); const auto & shards_info = cluster->getShardsInfo(); const auto & shards_addresses = cluster->getShardsAddresses(); - /// check new format shard{shard_index}_number{number_index} + /// check new format shard{shard_index}_number{replica_index} + /// (shard_index and replica_index starts from 1) if (address.shard_index != 0) { - return shards_info[address.shard_index - 1].per_replica_pools[address.replica_index - 1]; + if (!address.replica_index) + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, + "Wrong replica_index ({})", address.replica_index, name); + + if (address.shard_index > shards_info.size()) + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, + "No shard with shard_index={} ({})", address.shard_index, name); + + const auto & shard_info = shards_info[address.shard_index - 1]; + if (address.replica_index > shard_info.per_replica_pools.size()) + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, + "No shard with replica_index={} ({})", address.replica_index, name); + + return shard_info.per_replica_pools[address.replica_index - 1]; } /// existing connections pool have a higher priority @@ -398,7 +425,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri address.secure); }; - auto pools = createPoolsForAddresses(name, pool_factory); + auto pools = createPoolsForAddresses(name, pool_factory, storage.log); const auto settings = storage.global_context.getSettings(); return pools.size() == 1 ? pools.front() : std::make_shared(pools, From 9686649b0229cc4f492dbf646d6342d587f02657 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Feb 2021 12:42:50 +0300 Subject: [PATCH 62/94] Fix non-zero session reconnect in integration test --- tests/integration/test_testkeeper_back_to_back/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_testkeeper_back_to_back/test.py b/tests/integration/test_testkeeper_back_to_back/test.py index 0f2c1ed19a5..8ec54f1a883 100644 --- a/tests/integration/test_testkeeper_back_to_back/test.py +++ b/tests/integration/test_testkeeper_back_to_back/test.py @@ -29,8 +29,8 @@ def get_fake_zk(): def reset_last_zxid_listener(state): print("Fake zk callback called for state", state) global _fake_zk_instance - # reset last_zxid -- fake server doesn't support it - _fake_zk_instance.last_zxid = 0 + if state != KazooState.CONNECTED: + _fake_zk_instance._reset() _fake_zk_instance.add_listener(reset_last_zxid_listener) _fake_zk_instance.start() From ac476ad83e526d8afec591189f10c5933edf68e7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Mon, 15 Feb 2021 14:27:16 +0300 Subject: [PATCH 63/94] done --- .../1_stateful/00158_cache_dictionary_has.reference | 6 +++--- tests/queries/1_stateful/00158_cache_dictionary_has.sql | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.reference b/tests/queries/1_stateful/00158_cache_dictionary_has.reference index f8d5cd4f53d..ad4bce6bec5 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.reference +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.reference @@ -1,6 +1,6 @@ +100 6410 -6410 -25323 +100 25323 -1774655 +100 1774655 diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 063e7843fd4..8461728c58e 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -6,15 +6,15 @@ CREATE DICTIONARY db_dict.cache_hits PRIMARY KEY WatchID SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test')) LIFETIME(MIN 300 MAX 600) -LAYOUT(CACHE(SIZE_IN_CELLS 100000 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 350 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 3f86ce4c67371cb87263367e7eea0cc0dafaabb4 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Mon, 15 Feb 2021 15:04:30 +0300 Subject: [PATCH 64/94] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 097b7679899..518577c473c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -751,7 +751,7 @@ void StorageReplicatedMergeTree::drop() auto zookeeper = global_context.getZooKeeper(); /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. - if (is_readonly || !zookeeper) + if (!zookeeper) throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); shutdown(); From d615b8e516569ddf69ad92cd3b73f6591c0b7248 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Mon, 15 Feb 2021 16:10:14 +0300 Subject: [PATCH 65/94] more checks (cherry picked from commit b45168ecaf37d0061edfd12c67a8c5300d45d2e3) --- src/Formats/JSONEachRowUtils.cpp | 11 ++++++++--- src/IO/BufferWithOwnMemory.h | 6 +++--- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 13 +++++++++---- .../Formats/Impl/RegexpRowInputFormat.cpp | 5 ++++- .../Formats/Impl/TabSeparatedRowInputFormat.cpp | 8 +++++--- 5 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index 56bef9e09ea..28ba625d9fb 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) @@ -28,7 +29,9 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D if (quotes) { pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; if (*pos == '\\') { @@ -45,9 +48,11 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D else { pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '{') + else if (*pos == '{') { ++balance; ++pos; diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 782eea84ed7..f8cc8b7febb 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - Memory() {} + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_) { alloc(); } @@ -140,7 +140,7 @@ protected: Memory<> memory; public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. - BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) + explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) { Base::set(existing_memory ? existing_memory : memory.data(), size); diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8422f09e364..f7f08411dfa 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } @@ -436,9 +437,11 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB if (quotes) { pos = find_first_symbols<'"'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '"') + else if (*pos == '"') { ++pos; if (loadAtPosition(in, memory, pos) && *pos == '"') @@ -450,9 +453,11 @@ static std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB else { pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - if (*pos == '"') + else if (*pos == '"') { quotes = true; ++pos; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 6e14a1dc3c8..108f4d9d321 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -11,6 +11,7 @@ namespace ErrorCodes { extern const int INCORRECT_DATA; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } RegexpRowInputFormat::RegexpRowInputFormat( @@ -182,7 +183,9 @@ static std::pair fileSegmentationEngineRegexpImpl(ReadBuffer & in, while (loadAtPosition(in, memory, pos) && need_more_data) { pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; // Support DOS-style newline ("\r\n") diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 69a5e61caf2..96b01a5bd9b 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } @@ -433,10 +434,11 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer { pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); - if (pos == in.buffer().end()) + if (pos > in.buffer().end()) + throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + else if (pos == in.buffer().end()) continue; - - if (*pos == '\\') + else if (*pos == '\\') { ++pos; if (loadAtPosition(in, memory, pos)) From 812641f5a70f0912d809961f10bc6a9d39d2cb1c Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Mon, 15 Feb 2021 16:38:31 +0300 Subject: [PATCH 66/94] add test to arcadia skip list --- tests/queries/0_stateless/arcadia_skip_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 38d5d3871f5..b141443a979 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -189,6 +189,7 @@ 01650_fetch_patition_with_macro_in_zk_path 01651_bugs_from_15889 01655_agg_if_nullable +01658_read_file_to_stringcolumn 01182_materialized_view_different_structure 01660_sum_ubsan 01669_columns_declaration_serde From 8d11d09615bd89670594972ab36dfb6f29dafeea Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 15 Feb 2021 21:00:50 +0300 Subject: [PATCH 67/94] Add a test for malformed directores for Distributed async INSERT --- .../__init__.py | 0 .../configs/remote_servers.xml | 13 ++++++ .../test.py | 43 +++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/__init__.py create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml create mode 100644 tests/integration/test_insert_distributed_async_extra_dirs/test.py diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py b/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml new file mode 100644 index 00000000000..1df72377ce6 --- /dev/null +++ b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml @@ -0,0 +1,13 @@ + + + + + + node + 9000 + + + + + + diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/test.py b/tests/integration/test_insert_distributed_async_extra_dirs/test.py new file mode 100644 index 00000000000..8365fce298d --- /dev/null +++ b/tests/integration/test_insert_distributed_async_extra_dirs/test.py @@ -0,0 +1,43 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=['configs/remote_servers.xml'], stay_alive=True) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_insert_distributed_async_send_success(): + node.query('CREATE TABLE data (key Int, value String) Engine=Null()') + node.query(""" + CREATE TABLE dist AS data + Engine=Distributed( + test_cluster, + currentDatabase(), + data, + key + ) + """) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica10000']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica10000/1.bin']) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard1_replica10000']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard1_replica10000/1.bin']) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica1']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica1/1.bin']) + + # will check that clickhouse-server is alive + node.restart_clickhouse() From e3003add577d26444a6056a55cea30ca8b3285a6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 15 Feb 2021 01:12:02 +0300 Subject: [PATCH 68/94] HashTable fix bug during resize with nonstandard grower --- src/Common/HashTable/HashTable.h | 3 +- src/Common/tests/gtest_hash_table.cpp | 48 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index bf159e27731..892bd0b2ba9 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -539,7 +539,8 @@ protected: * after transferring all the elements from the old halves you need to [ o x ] * process tail from the collision resolution chain immediately after it [ o x ] */ - for (; !buf[i].isZero(*this); ++i) + size_t new_size = grower.bufSize(); + for (; i < new_size && !buf[i].isZero(*this); ++i) { size_t updated_place_value = reinsert(buf[i], buf[i].getHash(*this)); diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index 41255dcbba1..1c673166ca9 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -317,3 +317,51 @@ TEST(HashTable, SerializationDeserialization) ASSERT_EQ(convertToSet(cont), convertToSet(deserialized)); } } + +template +struct IdentityHash +{ + size_t operator()(T x) const { return x; } +}; + +struct OneElementResizeGrower +{ + /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements. + static constexpr auto performs_linear_probing_with_single_step = true; + + static constexpr size_t initial_count = 1; + + size_t bufSize() const { return buf_size; } + + size_t place(size_t x) const { return x % buf_size; } + + size_t next(size_t pos) const { return (pos + 1) % buf_size; } + + bool overflow(size_t elems) const { return elems >= buf_size; } + + void increaseSize() { ++buf_size; } + + void set(size_t) { } + + void setBufSize(size_t buf_size_) { buf_size = buf_size_; } + + size_t buf_size = initial_count; +}; + +TEST(HashTable, Resize) +{ + { + /// Test edge case if after resize all cells are resized in end of buf and will take half of + /// hash table place. + using HashSet = HashSet, OneElementResizeGrower>; + HashSet cont; + + cont.insert(3); + cont.insert(1); + + std::set expected = {1, 3}; + std::set actual = convertToSet(cont); + + ASSERT_EQ(actual, expected); + } +} From d08dcb1958a565ad62d2e688413c3942c20e91f6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 15 Feb 2021 22:35:49 +0300 Subject: [PATCH 69/94] Update docs/en/operations/settings/settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f64c623415b..963f9fa18bd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2598,7 +2598,7 @@ Sets the probability that the ClickHouse can start a trace for executed queries Possible values: -- 0 — The trace for a executed queries is disabled (if no parent trace context is supplied). +- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). - Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. - 1 — The trace for all executed queries is enabled. From 7f21a216941ae6557e8ac5f75d9093635ec71919 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 15 Feb 2021 22:40:55 +0300 Subject: [PATCH 70/94] Update index.md --- docs/en/sql-reference/window-functions/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 46f7ed3824e..07a7f2f6978 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -15,6 +15,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | Feature | Support or workaround | | --------| ----------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | | `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes | | `ROWS` frame | yes | | `RANGE` frame | yes, it is the default | From 2de6d550cc04d62c8189ca225c4016efe8c1847a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 15 Feb 2021 22:42:10 +0300 Subject: [PATCH 71/94] Update index.md --- docs/en/sql-reference/window-functions/index.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 07a7f2f6978..0a19b4a8da4 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -14,15 +14,15 @@ ClickHouse supports the standard grammar for defining windows and window functio | Feature | Support or workaround | | --------| ----------| -| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | yes | +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | | expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | -| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | yes | -| `ROWS` frame | yes | -| `RANGE` frame | yes, it is the default | -| `GROUPS` frame | no | +| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | +| `ROWS` frame | supported | +| `RANGE` frame | supported, the default | +| `GROUPS` frame | not supported | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | -| `rank()`, `dense_rank()`, `row_number()` | yes | -| `lag/lead(value, offset)` | no, replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`| +| `rank()`, `dense_rank()`, `row_number()` | supported | +| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`| ## References From c9dd1aa58b831835a801bb886c77ccc712febcd9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 15 Feb 2021 22:56:26 +0300 Subject: [PATCH 72/94] Update index.md --- docs/en/sql-reference/window-functions/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 0a19b4a8da4..cbf03a44d46 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -19,6 +19,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | | `ROWS` frame | supported | | `RANGE` frame | supported, the default | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead | | `GROUPS` frame | not supported | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | | `rank()`, `dense_rank()`, `row_number()` | supported | From 5273242f8608d09bb2280c04d7670b768c21235c Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 15 Feb 2021 23:26:29 +0300 Subject: [PATCH 73/94] Minor changes move ON to WHERE for INNER JOIN --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 81 +++++++++---------- src/Interpreters/CollectJoinOnKeysVisitor.h | 1 - src/Interpreters/TreeRewriter.cpp | 9 +-- 3 files changed, 44 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index ec413fe08fc..9033dd0f0f8 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -16,6 +16,26 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + +void addAndTerm(ASTPtr & ast, const ASTPtr & term) +{ + if (!ast) + ast = term; + else + ast = makeASTFunction("and", ast, term); +} + +/// If this is an inner join and the expression related to less than 2 tables, then move it to WHERE +bool canMoveToWhere(std::pair table_numbers, ASTTableJoin::Kind kind) +{ + return kind == ASTTableJoin::Kind::Inner && + (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0); +} + +} + void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no) { @@ -80,57 +100,36 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as ASTPtr right = func.arguments->children.at(1); auto table_numbers = getTableNumbers(left, right, data); - if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0) - data.new_on_expression_valid = true; - - /** - * if this is an inner join and the expression related to less than 2 tables, then move it to WHERE - */ - if (data.kind == ASTTableJoin::Kind::Inner - && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0)) + if (canMoveToWhere(table_numbers, data.kind)) { - if (!data.new_where_conditions) - data.new_where_conditions = ast->clone(); - else - data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); + addAndTerm(data.new_where_conditions, ast); } else { + if (data.kind == ASTTableJoin::Kind::Inner) + { + addAndTerm(data.new_on_expression, ast); + } data.addJoinKeys(left, right, table_numbers); - if (!data.new_on_expression) - data.new_on_expression = ast->clone(); - else - data.new_on_expression = makeASTFunction("and", data.new_on_expression, ast->clone()); } } - else if (inequality != ASOF::Inequality::None) + else if (inequality != ASOF::Inequality::None && !data.is_asof) { - if (!data.is_asof) + ASTPtr left = func.arguments->children.at(0); + ASTPtr right = func.arguments->children.at(1); + auto table_numbers = getTableNumbers(left, right, data); + if (canMoveToWhere(table_numbers, data.kind)) { - ASTPtr left = func.arguments->children.at(0); - ASTPtr right = func.arguments->children.at(1); - auto table_numbers = getTableNumbers(left, right, data); - - if (table_numbers.first != table_numbers.second && table_numbers.first > 0 && table_numbers.second > 0) - data.new_on_expression_valid = true; - - if (data.kind == ASTTableJoin::Kind::Inner - && (table_numbers.first == table_numbers.second || table_numbers.first == 0 || table_numbers.second == 0)) - { - if (!data.new_where_conditions) - data.new_where_conditions = ast->clone(); - else - data.new_where_conditions = makeASTFunction("and", data.new_where_conditions, ast->clone()); - - return; - } - else - { - throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::NOT_IMPLEMENTED); - } + addAndTerm(data.new_where_conditions, ast); } - + else + { + throw Exception("JOIN ON inequalities are not supported. Unexpected '" + queryToString(ast) + "'", + ErrorCodes::NOT_IMPLEMENTED); + } + } + else if (inequality != ASOF::Inequality::None && data.is_asof) + { if (data.asof_left_key || data.asof_right_key) throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.h b/src/Interpreters/CollectJoinOnKeysVisitor.h index 64547baf7d7..aa2fd80d07c 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -37,7 +37,6 @@ public: ASTPtr new_on_expression{}; ASTPtr new_where_conditions{}; bool has_some{false}; - bool new_on_expression_valid{false}; void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no); void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, const std::pair & table_no, diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 9f788703704..22356622f8d 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -424,11 +424,10 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression), ErrorCodes::INVALID_JOIN_ON_EXPRESSION); if (is_asof) + { data.asofToJoinKeys(); - else if (!data.new_on_expression_valid) - throw Exception("JOIN expects left and right joined keys from two joined table in ON section. Unexpected '" + queryToString(data.new_on_expression) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); - else if (data.new_where_conditions != nullptr) + } + else if (data.new_where_conditions && data.new_on_expression) { table_join.on_expression = data.new_on_expression; new_where_conditions = data.new_where_conditions; @@ -823,7 +822,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); - ASTPtr new_where_condition; + ASTPtr new_where_condition = nullptr; collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases, new_where_condition); if (new_where_condition) moveJoinedKeyToWhere(select_query, new_where_condition); From 5a5542dd5c6de677044e4da0b33a9a171aeb3bba Mon Sep 17 00:00:00 2001 From: Anna Date: Tue, 16 Feb 2021 00:03:02 +0300 Subject: [PATCH 74/94] Minor fixes --- docs/_description_templates/template-function.md | 4 +--- docs/_description_templates/template-system-table.md | 4 ++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index b69d7ed5309..2ff0ee586e8 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -19,9 +19,7 @@ More text (Optional). **Returned value(s)** -- Returned values list. - -Type: [Type](relative/path/to/type/dscr.md#type). +- Returned values list. [Type name](relative/path/to/type/dscr.md#type). **Example** diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md index 3fdf9788d79..f2decc4bb6d 100644 --- a/docs/_description_templates/template-system-table.md +++ b/docs/_description_templates/template-system-table.md @@ -8,10 +8,14 @@ Columns: **Example** +Query: + ``` sql SELECT * FROM system.table_name ``` +Result: + ``` text Some output. It shouldn't be too long. ``` From ce1f10904e820a538a4210e7a8aea92ea9021882 Mon Sep 17 00:00:00 2001 From: Anna Date: Tue, 16 Feb 2021 00:22:10 +0300 Subject: [PATCH 75/94] Global replacement `Parameters` to `Arguments` --- .../template-function.md | 10 +++- .../functions/array-functions.md | 44 ++++++++-------- .../sql-reference/functions/bit-functions.md | 8 +-- .../functions/bitmap-functions.md | 38 +++++++------- .../functions/conditional-functions.md | 4 +- .../functions/date-time-functions.md | 26 +++++----- .../functions/encoding-functions.md | 4 +- .../functions/encryption-functions.md | 8 +-- .../functions/ext-dict-functions.md | 10 ++-- .../functions/functions-for-nulls.md | 14 ++--- .../en/sql-reference/functions/geo/geohash.md | 2 +- docs/en/sql-reference/functions/geo/h3.md | 10 ++-- .../sql-reference/functions/hash-functions.md | 34 ++++++------ .../sql-reference/functions/introspection.md | 8 +-- .../functions/ip-address-functions.md | 4 +- .../sql-reference/functions/json-functions.md | 2 +- .../functions/machine-learning-functions.md | 2 +- .../sql-reference/functions/math-functions.md | 18 +++---- .../functions/other-functions.md | 52 +++++++++---------- .../functions/random-functions.md | 4 +- .../functions/rounding-functions.md | 4 +- .../functions/splitting-merging-functions.md | 6 +-- .../functions/string-functions.md | 22 ++++---- .../functions/string-search-functions.md | 24 ++++----- .../functions/tuple-functions.md | 2 +- .../functions/tuple-map-functions.md | 8 +-- .../functions/type-conversion-functions.md | 24 ++++----- .../sql-reference/functions/url-functions.md | 6 +-- .../functions/ym-dict-functions.md | 2 +- 29 files changed, 203 insertions(+), 197 deletions(-) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index 2ff0ee586e8..a0074a76ef6 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -12,14 +12,20 @@ Alias: ``. (Optional) More text (Optional). -**Parameters** (Optional) +**Arguments** (Optional) - `x` — Description. [Type name](relative/path/to/type/dscr.md#type). - `y` — Description. [Type name](relative/path/to/type/dscr.md#type). +**Parameters** (Optional, only for parametric aggregate functions) + +- `z` — Description. [Type name](relative/path/to/type/dscr.md#type). + **Returned value(s)** -- Returned values list. [Type name](relative/path/to/type/dscr.md#type). +- Returned values list. + +Type: [Type name](relative/path/to/type/dscr.md#type). **Example** diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d5b357795d7..c9c418d57a4 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -61,7 +61,7 @@ Combines arrays passed as arguments. arrayConcat(arrays) ``` -**Parameters** +**Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. **Example** @@ -111,7 +111,7 @@ Checks whether one array is a subset of another. hasAll(set, subset) ``` -**Parameters** +**Arguments** - `set` – Array of any type with a set of elements. - `subset` – Array of any type with elements that should be tested to be a subset of `set`. @@ -149,7 +149,7 @@ Checks whether two arrays have intersection by some elements. hasAny(array1, array2) ``` -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -191,7 +191,7 @@ For Example: - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`. - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`. -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -369,7 +369,7 @@ Removes the last item from the array. arrayPopBack(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -393,7 +393,7 @@ Removes the first item from the array. arrayPopFront(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -417,7 +417,7 @@ Adds one item to the end of the array. arrayPushBack(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -442,7 +442,7 @@ Adds one element to the beginning of the array. arrayPushFront(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -467,7 +467,7 @@ Changes the length of the array. arrayResize(array, size[, extender]) ``` -**Parameters:** +**Arguments:** - `array` — Array. - `size` — Required length of the array. @@ -509,7 +509,7 @@ Returns a slice of the array. arraySlice(array, offset[, length]) ``` -**Parameters** +**Arguments** - `array` – Array of data. - `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1. @@ -751,7 +751,7 @@ Calculates the difference between adjacent array elements. Returns an array wher arrayDifference(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -803,7 +803,7 @@ Takes an array, returns an array containing the distinct elements only. arrayDistinct(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -871,7 +871,7 @@ Applies an aggregate function to array elements and returns its result. The name arrayReduce(agg_func, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. @@ -936,7 +936,7 @@ Applies an aggregate function to array elements in given ranges and returns an a arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. @@ -1007,7 +1007,7 @@ flatten(array_of_arrays) Alias: `flatten`. -**Parameters** +**Arguments** - `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. @@ -1033,7 +1033,7 @@ Removes consecutive duplicate elements from an array. The order of result values arrayCompact(arr) ``` -**Parameters** +**Arguments** `arr` — The [array](../../sql-reference/data-types/array.md) to inspect. @@ -1069,7 +1069,7 @@ Combines multiple arrays into a single array. The resulting array contains the c arrayZip(arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `arrN` — [Array](../../sql-reference/data-types/array.md). @@ -1107,7 +1107,7 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see arrayAUC(arr_scores, arr_labels) ``` -**Parameters** +**Arguments** - `arr_scores` — scores prediction model gives. - `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample. @@ -1302,7 +1302,7 @@ Note that the `arrayMin` is a [higher-order function](../../sql-reference/functi arrayMin([func,] arr) ``` -**Parameters** +**Arguments** - `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). - `arr` — Array. [Array](../../sql-reference/data-types/array.md). @@ -1357,7 +1357,7 @@ Note that the `arrayMax` is a [higher-order function](../../sql-reference/functi arrayMax([func,] arr) ``` -**Parameters** +**Arguments** - `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). - `arr` — Array. [Array](../../sql-reference/data-types/array.md). @@ -1412,7 +1412,7 @@ Note that the `arraySum` is a [higher-order function](../../sql-reference/functi arraySum([func,] arr) ``` -**Parameters** +**Arguments** - `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). - `arr` — Array. [Array](../../sql-reference/data-types/array.md). @@ -1467,7 +1467,7 @@ Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functi arrayAvg([func,] arr) ``` -**Parameters** +**Arguments** - `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). - `arr` — Array. [Array](../../sql-reference/data-types/array.md). diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 57c2ae42ada..a3d0c82d8ab 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -35,7 +35,7 @@ Takes any integer and converts it into [binary form](https://en.wikipedia.org/wi SELECT bitTest(number, index) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index` – position of bit. @@ -100,7 +100,7 @@ The conjuction for bitwise operations: SELECT bitTestAll(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). @@ -165,7 +165,7 @@ The disjunction for bitwise operations: SELECT bitTestAny(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. @@ -220,7 +220,7 @@ Calculates the number of bits set to one in the binary representation of a numbe bitCount(x) ``` -**Parameters** +**Arguments** - `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index a66098beffb..bfff70576f2 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -21,7 +21,7 @@ Build a bitmap from unsigned integer array. bitmapBuild(array) ``` -**Parameters** +**Arguments** - `array` – unsigned integer array. @@ -45,7 +45,7 @@ Convert bitmap to integer array. bitmapToArray(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -69,7 +69,7 @@ Return subset in specified range (not include the range_end). bitmapSubsetInRange(bitmap, range_start, range_end) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -97,7 +97,7 @@ Creates a subset of bitmap with n elements taken between `range_start` and `card bitmapSubsetLimit(bitmap, range_start, cardinality_limit) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -133,7 +133,7 @@ Checks whether the bitmap contains an element. bitmapContains(haystack, needle) ``` -**Parameters** +**Arguments** - `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches. - `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -167,7 +167,7 @@ bitmapHasAny(bitmap1, bitmap2) If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently. -**Parameters** +**Arguments** - `bitmap*` – bitmap object. @@ -197,7 +197,7 @@ If the second argument is an empty bitmap then returns 1. bitmapHasAll(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -221,7 +221,7 @@ Retrun bitmap cardinality of type UInt64. bitmapCardinality(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -243,7 +243,7 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em bitmapMin(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -263,7 +263,7 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. bitmapMax(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -283,7 +283,7 @@ Transform an array of values in a bitmap to another array of values, the result bitmapTransform(bitmap, from_array, to_array) -**Parameters** +**Arguments** - `bitmap` – bitmap object. - `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array. @@ -307,7 +307,7 @@ Two bitmap and calculation, the result is a new bitmap. bitmapAnd(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -331,7 +331,7 @@ Two bitmap or calculation, the result is a new bitmap. bitmapOr(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -355,7 +355,7 @@ Two bitmap xor calculation, the result is a new bitmap. bitmapXor(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -379,7 +379,7 @@ Two bitmap andnot calculation, the result is a new bitmap. bitmapAndnot(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -403,7 +403,7 @@ Two bitmap and calculation, return cardinality of type UInt64. bitmapAndCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -427,7 +427,7 @@ Two bitmap or calculation, return cardinality of type UInt64. bitmapOrCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -451,7 +451,7 @@ Two bitmap xor calculation, return cardinality of type UInt64. bitmapXorCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -475,7 +475,7 @@ Two bitmap andnot calculation, return cardinality of type UInt64. bitmapAndnotCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index 446a4729ff2..2d57cbb3bd5 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -17,7 +17,7 @@ SELECT if(cond, then, else) If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned. -**Parameters** +**Arguments** - `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL. - `then` - The expression to return if condition is met. @@ -117,7 +117,7 @@ Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_c Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)` -**Parameters:** +**Arguments:** - `cond_N` — The condition for the function to return `then_N`. - `then_N` — The result of the function when executed. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4a73bdb2546..f26e1bee6c9 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -186,7 +186,7 @@ Truncates sub-seconds. toStartOfSecond(value[, timezone]) ``` -**Parameters** +**Arguments** - `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). @@ -328,7 +328,7 @@ For mode values with a meaning of “contains January 1”, the week contains Ja toWeek(date, [, mode][, Timezone]) ``` -**Parameters** +**Arguments** - `date` – Date or DateTime. - `mode` – Optional parameter, Range of values is \[0,9\], default is 0. @@ -378,7 +378,7 @@ date_trunc(unit, value[, timezone]) Alias: `dateTrunc`. -**Parameters** +**Arguments** - `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). Possible values: @@ -447,7 +447,7 @@ date_add(unit, value, date) Aliases: `dateAdd`, `DATE_ADD`. -**Parameters** +**Arguments** - `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). @@ -484,7 +484,7 @@ date_diff('unit', startdate, enddate, [timezone]) Aliases: `dateDiff`, `DATE_DIFF`. -**Parameters** +**Arguments** - `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md). @@ -530,7 +530,7 @@ date_sub(unit, value, date) Aliases: `dateSub`, `DATE_SUB`. -**Parameters** +**Arguments** - `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). @@ -570,7 +570,7 @@ timestamp_add(date, INTERVAL value unit) Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. -**Parameters** +**Arguments** - `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) @@ -606,7 +606,7 @@ timestamp_sub(unit, value, date) Aliases: `timeStampSub`, `TIMESTAMP_SUB`. -**Parameters** +**Arguments** - `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). @@ -640,7 +640,7 @@ Returns the current date and time. now([timezone]) ``` -**Parameters** +**Arguments** - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). @@ -855,7 +855,7 @@ Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Prolepti toModifiedJulianDay(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -891,7 +891,7 @@ Similar to [toModifiedJulianDay()](#tomodifiedjulianday), but instead of raising toModifiedJulianDayOrNull(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -927,7 +927,7 @@ Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Varian fromModifiedJulianDay(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). @@ -963,7 +963,7 @@ Similar to [fromModifiedJulianDayOrNull()](#frommodifiedjuliandayornull), but in fromModifiedJulianDayOrNull(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index bc3f5ca4345..31e84c08b39 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -15,7 +15,7 @@ Returns the string with the length as the number of passed arguments and each by char(number_1, [number_2, ..., number_n]); ``` -**Parameters** +**Arguments** - `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). @@ -107,7 +107,7 @@ For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted. -**Parameters** +**Arguments** - `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 9e360abfe26..0dd7469b25e 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -31,7 +31,7 @@ This function encrypts data using these modes: encrypt('mode', 'plaintext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). @@ -127,7 +127,7 @@ Supported encryption modes: aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). @@ -238,7 +238,7 @@ This function decrypts ciphertext into a plaintext using these modes: decrypt('mode', 'ciphertext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). @@ -317,7 +317,7 @@ Supported decryption modes: aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 7df6ef54f2a..834fcdf8282 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -19,7 +19,7 @@ dictGet('dict_name', 'attr_name', id_expr) dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). @@ -108,7 +108,7 @@ Checks whether a key is present in a dictionary. dictHas('dict_name', id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. @@ -130,7 +130,7 @@ Creates an array, containing all the parents of a key in the [hierarchical dicti dictGetHierarchy('dict_name', key) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -149,7 +149,7 @@ Checks the ancestor of a key through the whole hierarchical chain in the diction dictIsIn('dict_name', child_id_expr, ancestor_id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -185,7 +185,7 @@ dictGet[Type]('dict_name', 'attr_name', id_expr) dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index c32af7194fb..df75e96c8fb 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -13,7 +13,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNull(x) ``` -**Parameters** +**Arguments** - `x` — A value with a non-compound data type. @@ -53,7 +53,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — A value with a non-compound data type. @@ -93,7 +93,7 @@ Checks from left to right whether `NULL` arguments were passed and returns the f coalesce(x,...) ``` -**Parameters:** +**Arguments:** - Any number of parameters of a non-compound type. All parameters must be compatible by data type. @@ -136,7 +136,7 @@ Returns an alternative value if the main argument is `NULL`. ifNull(x,alt) ``` -**Parameters:** +**Arguments:** - `x` — The value to check for `NULL`. - `alt` — The value that the function returns if `x` is `NULL`. @@ -176,7 +176,7 @@ Returns `NULL` if the arguments are equal. nullIf(x, y) ``` -**Parameters:** +**Arguments:** `x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception. @@ -215,7 +215,7 @@ Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md assumeNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — The original value. @@ -277,7 +277,7 @@ Converts the argument type to `Nullable`. toNullable(x) ``` -**Parameters:** +**Arguments:** - `x` — The value of any non-compound type. diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 6f288a7687d..c27eab0b421 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -72,7 +72,7 @@ Returns an array of [geohash](#geohash)-encoded strings of given precision that geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision) ``` -**Parameters** +**Arguments** - `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). - `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 4ed651e4e9e..9dda947b3a7 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -162,7 +162,7 @@ Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. geoToH3(lon, lat, resolution) ``` -**Parameters** +**Arguments** - `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). - `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -201,7 +201,7 @@ Result: h3kRing(h3index, k) ``` -**Parameters** +**Arguments** - `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md) @@ -315,7 +315,7 @@ Returns whether or not the provided [H3](#h3index) indexes are neighbors. h3IndexesAreNeighbors(index1, index2) ``` -**Parameters** +**Arguments** - `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -353,7 +353,7 @@ Returns an array of child indexes for the given [H3](#h3index) index. h3ToChildren(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -390,7 +390,7 @@ Returns the parent (coarser) index containing the given [H3](#h3index) index. h3ToParent(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9394426b20b..465ad01527f 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -18,9 +18,9 @@ halfMD5(par1, ...) The function is relatively slow (5 million short strings per second per processor core). Consider using the [sipHash64](#hash_functions-siphash64) function instead. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -61,9 +61,9 @@ Function [interprets](../../sql-reference/functions/type-conversion-functions.md 3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them. 4. The previous step is repeated for all the remaining elements of the initial hash array. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -97,9 +97,9 @@ cityHash64(par1,...) This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -166,9 +166,9 @@ farmHash64(par1, ...) These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h). -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -226,7 +226,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 javaHashUTF16LE(stringUtf16le) ``` -**Parameters** +**Arguments** - `stringUtf16le` — a string in UTF-16LE encoding. @@ -292,9 +292,9 @@ Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/ metroHash64(par1, ...) ``` -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -327,9 +327,9 @@ murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -358,7 +358,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val gccMurmurHash(par1, ...); ``` -**Parameters** +**Arguments** - `par1, ...` — A variable number of parameters that can be any of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -395,9 +395,9 @@ murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -424,7 +424,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu murmurHash3_128( expr ) ``` -**Parameters** +**Arguments** - `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index bfa1998d68a..964265a461b 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -32,7 +32,7 @@ If you use official ClickHouse packages, you need to install the `clickhouse-com addressToLine(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -123,7 +123,7 @@ Converts virtual memory address inside ClickHouse server process to the symbol f addressToSymbol(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -220,7 +220,7 @@ Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) demangle(symbol) ``` -**Parameters** +**Arguments** - `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. @@ -345,7 +345,7 @@ Emits trace log message to server log for each [Block](https://clickhouse.tech/d logTrace('message') ``` -**Parameters** +**Arguments** - `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 0c1f675304b..eaea5e250fb 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -275,7 +275,7 @@ Determines whether the input string is an IPv4 address or not. If `string` is IP isIPv4String(string) ``` -**Parameters** +**Arguments** - `string` — IP address. [String](../../sql-reference/data-types/string.md). @@ -313,7 +313,7 @@ Determines whether the input string is an IPv6 address or not. If `string` is IP isIPv6String(string) ``` -**Parameters** +**Arguments** - `string` — IP address. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 05e755eaddc..edee048eb77 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -236,7 +236,7 @@ Extracts raw data from a JSON object. JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) ``` -**Parameters** +**Arguments** - `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. - `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 8627fc26bad..f103a4ea421 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -27,7 +27,7 @@ Compares test groups (variants) and calculates for each group the probability to bayesAB(distribution_name, higher_is_better, variant_names, x, y) ``` -**Parameters** +**Arguments** - `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values: diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 8dc287593c7..f56a721c0c0 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -121,7 +121,7 @@ Accepts a numeric argument and returns a UInt64 number close to 10 to the power cosh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -157,7 +157,7 @@ Result: acosh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -197,7 +197,7 @@ Result: sinh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -233,7 +233,7 @@ Result: asinh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -273,7 +273,7 @@ Result: atanh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -309,7 +309,7 @@ The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the atan2(y, x) ``` -**Parameters** +**Arguments** - `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -346,7 +346,7 @@ Calculates the length of the hypotenuse of a right-angle triangle. The [function hypot(x, y) ``` -**Parameters** +**Arguments** - `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -383,7 +383,7 @@ Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_loga log1p(x) ``` -**Parameters** +**Arguments** - `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -423,7 +423,7 @@ The `sign` function can extract the sign of a real number. sign(x) ``` -**Parameters** +**Arguments** - `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 8f25ce023df..dcbb7d1ffeb 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -19,7 +19,7 @@ Gets a named value from the [macros](../../operations/server-configuration-param getMacro(name); ``` -**Parameters** +**Arguments** - `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). @@ -108,7 +108,7 @@ Extracts the trailing part of a string after the last slash or backslash. This f basename( expr ) ``` -**Parameters** +**Arguments** - `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. @@ -192,7 +192,7 @@ Returns estimation of uncompressed byte size of its arguments in memory. byteSize(argument [, ...]) ``` -**Parameters** +**Arguments** - `argument` — Value. @@ -349,7 +349,7 @@ The function is intended for development, debugging and demonstration. isConstant(x) ``` -**Parameters** +**Arguments** - `x` — Expression to check. @@ -420,7 +420,7 @@ Checks whether floating point value is finite. ifNotFinite(x,y) -**Parameters** +**Arguments** - `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). - `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). @@ -460,7 +460,7 @@ Allows building a unicode-art diagram. `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. -Parameters: +Arguments: - `x` — Size to display. - `min, max` — Integer constants. The value must fit in `Int64`. @@ -645,7 +645,7 @@ Accepts the time delta in seconds. Returns a time delta with (year, month, day, formatReadableTimeDelta(column[, maximum_unit]) ``` -**Parameters** +**Arguments** - `column` — A column with numeric time delta. - `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. @@ -730,7 +730,7 @@ The result of the function depends on the affected data blocks and the order of The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery. -**Parameters** +**Arguments** - `column` — A column name or scalar expression. - `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). @@ -924,7 +924,7 @@ The result of the function depends on the order of data in the block. It assumes runningConcurrency(begin, end) ``` -**Parameters** +**Arguments** - `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). - `end` — A column for the ending time of events (exclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -989,7 +989,7 @@ Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). getSizeOfEnumType(value) ``` -**Parameters:** +**Arguments:** - `value` — Value of type `Enum`. @@ -1018,7 +1018,7 @@ Returns size on disk (without taking into account compression). blockSerializedSize(value[, value[, ...]]) ``` -**Parameters** +**Arguments** - `value` — Any value. @@ -1050,7 +1050,7 @@ Returns the name of the class that represents the data type of the column in RAM toColumnTypeName(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -1090,7 +1090,7 @@ Outputs a detailed description of data structures in RAM dumpColumnStructure(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -1120,7 +1120,7 @@ Does not include default values for custom columns set by the user. defaultValueOfArgumentType(expression) ``` -**Parameters:** +**Arguments:** - `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. @@ -1162,7 +1162,7 @@ Does not include default values for custom columns set by the user. defaultValueOfTypeName(type) ``` -**Parameters:** +**Arguments:** - `type` — A string representing a type name. @@ -1204,7 +1204,7 @@ Used for internal implementation of [arrayJoin](../../sql-reference/functions/ar SELECT replicate(x, arr); ``` -**Parameters:** +**Arguments:** - `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. - `x` — The value that the resulting array will be filled with. @@ -1337,7 +1337,7 @@ Takes state of aggregate function. Returns result of aggregation (or finalized s finalizeAggregation(state) ``` -**Parameters** +**Arguments** - `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). @@ -1441,7 +1441,7 @@ Accumulates states of an aggregate function for each row of a data block. runningAccumulate(agg_state[, grouping]); ``` -**Parameters** +**Arguments** - `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). - `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. @@ -1547,7 +1547,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` st joinGet(join_storage_table_name, `value_column`, join_keys) ``` -**Parameters** +**Arguments** - `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. - `value_column` — name of the column of the table that contains required data. @@ -1651,7 +1651,7 @@ Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/AS randomPrintableASCII(length) ``` -**Parameters** +**Arguments** - `length` — Resulting string length. Positive integer. @@ -1687,7 +1687,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomString(length) ``` -**Parameters** +**Arguments** - `length` — String length. Positive integer. @@ -1735,7 +1735,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomFixedString(length); ``` -**Parameters** +**Arguments** - `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1773,7 +1773,7 @@ Generates a random string of a specified length. Result string contains valid UT randomStringUTF8(length); ``` -**Parameters** +**Arguments** - `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1845,7 +1845,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is isDecimalOverflow(d, [p]) ``` -**Parameters** +**Arguments** - `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). - `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). @@ -1882,7 +1882,7 @@ Returns number of decimal digits you need to represent the value. countDigits(x) ``` -**Parameters** +**Arguments** - `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. @@ -1941,7 +1941,7 @@ Returns [native interface](../../interfaces/tcp.md) TCP port number listened by tcpPort() ``` -**Parameters** +**Arguments** - None. diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 68998928398..2b9846344e4 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -32,7 +32,7 @@ Produces a constant column with a random value. randConstant([x]) ``` -**Parameters** +**Arguments** - `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. @@ -81,7 +81,7 @@ fuzzBits([s], [prob]) Inverts bits of `s`, each with probability `prob`. -**Parameters** +**Arguments** - `s` - `String` or `FixedString` - `prob` - constant `Float32/64` diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 922cf7374d7..83db1975366 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -35,7 +35,7 @@ The function returns the nearest number of the specified order. In case when giv round(expression [, decimal_places]) ``` -**Parameters:** +**Arguments:** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — An integer value. @@ -114,7 +114,7 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: roundBankers(expression [, decimal_places]) ``` -**Parameters** +**Arguments** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 25f41211b47..c70ee20f076 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -16,7 +16,7 @@ Returns an array of selected substrings. Empty substrings may be selected if the splitByChar(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -53,7 +53,7 @@ Splits a string into substrings separated by a string. It uses a constant string splitByString(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -121,7 +121,7 @@ Extracts all groups from non-overlapping substrings matched by a regular express extractAllGroups(text, regexp) ``` -**Parameters** +**Arguments** - `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 2b93dd924a3..3f6ffeee654 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i toValidUTF8( input_string ) ``` -Parameters: +Arguments: - input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. @@ -104,7 +104,7 @@ Repeats a string as many times as specified and concatenates the replicated valu repeat(s, n) ``` -**Parameters** +**Arguments** - `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). - `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md). @@ -173,7 +173,7 @@ Concatenates the strings listed in the arguments, without a separator. concat(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -211,7 +211,7 @@ The function is named “injective” if it always returns different result for concatAssumeInjective(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -328,7 +328,7 @@ By default removes all consecutive occurrences of common whitespace (ASCII chara trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) ``` -**Parameters** +**Arguments** - `trim_character` — specified characters for trim. [String](../../sql-reference/data-types/string.md). - `input_string` — string for trim. [String](../../sql-reference/data-types/string.md). @@ -367,7 +367,7 @@ trimLeft(input_string) Alias: `ltrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -405,7 +405,7 @@ trimRight(input_string) Alias: `rtrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -443,7 +443,7 @@ trimBoth(input_string) Alias: `trim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -496,7 +496,7 @@ Replaces literals, sequences of literals and complex aliases with placeholders. normalizeQuery(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -532,7 +532,7 @@ Returns identical 64bit hash values without the values of literals for similar q normalizedQueryHash(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -570,7 +570,7 @@ The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, encodeXMLComponent(x) ``` -**Parameters** +**Arguments** - `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 92591c89a37..83b0edea438 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -24,7 +24,7 @@ position(haystack, needle[, start_pos]) Alias: `locate(haystack, needle[, start_pos])`. -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -95,7 +95,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -138,7 +138,7 @@ For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#p positionUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -211,7 +211,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -256,7 +256,7 @@ The search is performed on sequences of bytes without respect to string encoding multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -371,7 +371,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsHorizontal(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -412,7 +412,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsVertical(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -471,7 +471,7 @@ Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference ilike(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters. @@ -548,7 +548,7 @@ For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-re countSubstrings(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -614,7 +614,7 @@ Returns the number of substring occurrences case-insensitive. countSubstringsCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -680,7 +680,7 @@ Returns the number of substring occurrences in `UTF-8` case-insensitive. SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -732,7 +732,7 @@ Returns the number of regular expression matches for a `pattern` in a `haystack` countMatches(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index dcbcd3e374b..1006b68b8ee 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -45,7 +45,7 @@ untuple(x) You can use the `EXCEPT` expression to skip columns as a result of the query. -**Parameters** +**Arguments** - `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 18d008f11f2..2b3a9d9103f 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -15,7 +15,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types map(key1, value1[, key2, value2, ...]) ``` -**Parameters** +**Arguments** - `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). - `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). @@ -77,7 +77,7 @@ Collect all the keys and sum corresponding values. mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -111,7 +111,7 @@ Collect all the keys and subtract corresponding values. mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -149,7 +149,7 @@ Generates a map, where keys are a series of numbers, from minimum to maximum key The number of elements in `keys` and `values` must be the same for each row. -**Parameters** +**Arguments** - `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). - `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 3ca36f41c78..450945a5ab9 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) - `toInt128(expr)` — Results in the `Int128` data type. - `toInt256(expr)` — Results in the `Int256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -88,7 +88,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md - `toUInt64(expr)` — Results in the `UInt64` data type. - `toUInt256(expr)` — Results in the `UInt256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -154,7 +154,7 @@ Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data- These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -199,7 +199,7 @@ Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/dec These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -467,7 +467,7 @@ toIntervalQuarter(number) toIntervalYear(number) ``` -**Parameters** +**Arguments** - `number` — Duration of interval. Positive integer number. @@ -505,7 +505,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112 parseDateTimeBestEffort(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -617,7 +617,7 @@ This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebestef parseDateTimeBestEffortUS(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -701,7 +701,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers toLowCardinality(expr) ``` -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -741,7 +741,7 @@ Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Inpu toUnixTimestamp64Milli(value) ``` -**Parameters** +**Arguments** - `value` — DateTime64 value with any precision. @@ -793,7 +793,7 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and fromUnixTimestamp64Milli(value [, ti]) ``` -**Parameters** +**Arguments** - `value` — `Int64` value with any precision. - `timezone` — `String` (optional) timezone name of the result. @@ -825,7 +825,7 @@ Converts arbitrary expressions into a string via given format. formatRow(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. @@ -866,7 +866,7 @@ Converts arbitrary expressions into a string via given format. The function trim formatRowNoNewline(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 006542f494a..3eea69c552b 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -25,7 +25,7 @@ Extracts the hostname from a URL. domain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -76,7 +76,7 @@ Extracts the the top-level domain from a URL. topLevelDomain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -242,7 +242,7 @@ Extracts network locality (`username:password@host:port`) from a URL. netloc(URL) ``` -**Parameters** +**Arguments** - `url` — URL. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index f70532252c7..56530b5e83b 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -115,7 +115,7 @@ Finds the highest continent in the hierarchy for the region. regionToTopContinent(id[, geobase]); ``` -**Parameters** +**Arguments** - `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. From 97d7a53962a2279f9c0b1d5880e82f16a04b6ed0 Mon Sep 17 00:00:00 2001 From: Anna Date: Tue, 16 Feb 2021 00:33:53 +0300 Subject: [PATCH 76/94] Replacement `Parameters` to `Arguments` for aggregate functions --- .../aggregate-functions/combinators.md | 6 ++-- .../parametric-functions.md | 32 ++++++++++++------- .../aggregate-functions/reference/argmax.md | 2 +- .../aggregate-functions/reference/argmin.md | 2 +- .../aggregate-functions/reference/avg.md | 2 +- .../reference/avgweighted.md | 2 +- .../aggregate-functions/reference/count.md | 2 +- .../reference/grouparrayinsertat.md | 2 +- .../reference/grouparraymovingavg.md | 2 +- .../reference/grouparraymovingsum.md | 2 +- .../reference/grouparraysample.md | 2 +- .../reference/groupbitand.md | 2 +- .../reference/groupbitmap.md | 2 +- .../reference/groupbitmapand.md | 2 +- .../reference/groupbitmapor.md | 2 +- .../reference/groupbitmapxor.md | 2 +- .../reference/groupbitor.md | 2 +- .../reference/groupbitxor.md | 2 +- .../reference/initializeAggregation.md | 2 +- .../aggregate-functions/reference/kurtpop.md | 2 +- .../aggregate-functions/reference/kurtsamp.md | 2 +- .../reference/mannwhitneyutest.md | 2 +- .../aggregate-functions/reference/quantile.md | 2 +- .../reference/quantiledeterministic.md | 2 +- .../reference/quantileexact.md | 6 ++-- .../reference/quantileexactweighted.md | 2 +- .../reference/quantiletdigest.md | 2 +- .../reference/quantiletdigestweighted.md | 2 +- .../reference/quantiletiming.md | 2 +- .../reference/quantiletimingweighted.md | 2 +- .../aggregate-functions/reference/rankCorr.md | 2 +- .../aggregate-functions/reference/skewpop.md | 2 +- .../aggregate-functions/reference/skewsamp.md | 2 +- .../reference/studentttest.md | 2 +- .../aggregate-functions/reference/topk.md | 2 +- .../reference/topkweighted.md | 2 +- .../aggregate-functions/reference/uniq.md | 2 +- .../reference/uniqcombined.md | 2 +- .../reference/uniqexact.md | 2 +- .../reference/uniqhll12.md | 2 +- .../reference/welchttest.md | 2 +- 41 files changed, 65 insertions(+), 55 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 431968bc629..015c90e90c7 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re OrDefault(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../ OrNull(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho Resample(start, end, step)(, resampling_key) ``` -**Parameters** +**Arguments** - `start` — Starting value of the whole required interval for `resampling_key` values. - `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 4b3bf12aa8c..035bc91b9ed 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -17,10 +17,13 @@ histogram(number_of_bins)(values) The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal. +**Arguments** + +`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. + **Parameters** `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. **Returned values** @@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) !!! warning "Warning" Events that occur at the same second may lay in the sequence in an undefined order affecting the result. -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - 1, if the pattern is matched. @@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche sequenceCount(pattern)(timestamp, cond1, cond2, ...) ``` -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - Number of non-overlapping event chains that are matched. @@ -239,13 +246,16 @@ The function works according to the algorithm: windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) ``` +**Arguments** + +- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). +- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). + **Parameters** - `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`. -- `mode` - It is an optional argument. +- `mode` - It is an optional parameter. - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). -- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). **Returned value** @@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will retention(cond1, cond2, ..., cond32); ``` -**Parameters** +**Arguments** - `cond` — an expression that returns a `UInt8` result (1 or 0). diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 9899c731ce9..7639117042f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -20,7 +20,7 @@ or argMax(tuple(arg, val)) ``` -**Parameters** +**Arguments** - `arg` — Argument. - `val` — Value. diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 2fe9a313260..7ddc38cd28a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -20,7 +20,7 @@ or argMin(tuple(arg, val)) ``` -**Parameters** +**Arguments** - `arg` — Argument. - `val` — Value. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index e2e6aace734..12dc4ac1e9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -12,7 +12,7 @@ Calculates the arithmetic mean. avgWeighted(x) ``` -**Parameter** +**Arguments** - `x` — Values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 7b9c0de2755..2df09e560b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted avgWeighted(x, weight) ``` -**Parameters** +**Arguments** - `x` — Values. - `weight` — Weights of the values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index e5d31429e12..0a5aef2fe97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`: - `count(expr)` or `COUNT(DISTINCT expr)`. - `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. -**Parameters** +**Arguments** The function can take: diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index f4b8665a0a4..68456bf7844 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function - If a query is executed in a single thread, the first one of the inserted values is used. - If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. -**Parameters** +**Arguments** - `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). - `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 1cd40c2002f..c732efecf58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index ef979cd5f6a..c3dfeda850e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 36fa6a9d661..df0b8120eef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l groupArraySample(max_size[, seed])(x) ``` -**Parameters** +**Arguments** - `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). - `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 9be73fd54ec..1275ad7536c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers. groupBitAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 9367652db38..9317ef98783 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal groupBitmap(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 7c0c89040bb..f59bb541a42 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a groupBitmapAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index 894c6c90aab..a4d99fd29e3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index 5d0ec0fb097..834f088d02f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 7383e620060..e427a9ad970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers. groupBitOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 01026012b91..4b8323f92db 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers. groupBitXor(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md index ea44d5f1ddd..313d6bf81f5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -13,7 +13,7 @@ Use it for tests or to process columns of types `AggregateFunction` and `Aggrega initializeAggregation (aggregate_function, column_1, column_2); ``` -**Parameters** +**Arguments** - `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). - `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index 65e7e31b9b4..db402c99663 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -10,7 +10,7 @@ Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. kurtPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 224bbbdb9e7..4bb9f76763b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the kurtosis of a random variable if passe kurtSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index 012df7052aa..e6dd680c457 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -16,7 +16,7 @@ mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_ind Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution. -**Parameters** +**Arguments** - `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). - `'two-sided'`; diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 77f858a1735..d625ef4cfd9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -18,7 +18,7 @@ quantile(level)(expr) Alias: `median`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 6046447dd10..a20ac26f599 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -18,7 +18,7 @@ quantileDeterministic(level)(expr, determinator) Alias: `medianDeterministic`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index a39f724f368..06ef7ccfbd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -18,7 +18,7 @@ quantileExact(level)(expr) Alias: `medianExact`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -77,7 +77,7 @@ quantileExact(level)(expr) Alias: `medianExactLow`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -128,7 +128,7 @@ quantileExactHigh(level)(expr) Alias: `medianExactHigh`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 3251f8298a6..210f44e7587 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -18,7 +18,7 @@ quantileExactWeighted(level)(expr, weight) Alias: `medianExactWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index bda98ea338d..dcc665a68af 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 309cbe95e95..56ef598f7e7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index 867e8b87e74..58ce6495a96 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -18,7 +18,7 @@ quantileTiming(level)(expr) Alias: `medianTiming`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 817cd831d85..fb3b9dbf4d2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -18,7 +18,7 @@ quantileTimingWeighted(level)(expr, weight) Alias: `medianTimingWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index dc23029f239..55ee1b8289b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -8,7 +8,7 @@ Computes a rank correlation coefficient. rankCorr(x, y) ``` -**Parameters** +**Arguments** - `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). - `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index d15a5ffdd47..b9dfc390f9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -10,7 +10,7 @@ Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. skewPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index cb323f4b142..f7a6df8f507 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the skewness of a random variable if passe skewSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index f868e976039..ba10c1d62d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -16,7 +16,7 @@ studentTTest(sample_data, sample_index) Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed. -**Parameters** +**Arguments** - `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). - `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index 004a67d33af..b3e79803ba1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -16,7 +16,7 @@ This function doesn’t provide a guaranteed result. In certain situations, erro We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. -**Parameters** +**Arguments** - ‘N’ is the number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index b597317f44e..02b9f77ea6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -12,7 +12,7 @@ Similar to `topK` but takes one additional argument of integer type - `weight`. topKWeighted(N)(x, weight) ``` -**Parameters** +**Arguments** - `N` — The number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 81d1ec6761e..7ba2cdc6cb8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -10,7 +10,7 @@ Calculates the approximate number of different values of the argument. uniq(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index c52486bc38f..4434686ae61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -12,7 +12,7 @@ uniqCombined(HLL_precision)(x[, ...]) The `uniqCombined` function is a good choice for calculating the number of different values. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index 9a6224533c8..eee675016ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -14,7 +14,7 @@ Use the `uniqExact` function if you absolutely need an exact result. Otherwise u The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index fcddc22cc46..5b23ea81eae 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -10,7 +10,7 @@ Calculates the approximate number of different argument values, using the [Hyper uniqHLL12(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 3fe1c9d58b9..18cff885867 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -16,7 +16,7 @@ welchTTest(sample_data, sample_index) Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance. -**Parameters** +**Arguments** - `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). - `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). From d7db44c9116a6b1f767d56a5cd1963a13b5a880d Mon Sep 17 00:00:00 2001 From: Anna Date: Tue, 16 Feb 2021 00:38:32 +0300 Subject: [PATCH 77/94] Other replacement --- .../aggregate-functions/reference/mannwhitneyutest.md | 8 +++++--- docs/en/sql-reference/table-functions/generate.md | 2 +- docs/en/sql-reference/table-functions/mysql.md | 2 +- docs/en/sql-reference/table-functions/view.md | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index e6dd680c457..12982849513 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -18,14 +18,16 @@ The null hypothesis is that two populations are stochastically equal. Also one-s **Arguments** +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Parameters** + - `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). - `'two-sided'`; - `'greater'`; - `'less'`. - `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). -- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). - **Returned values** diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index 5bbd22dfe4e..be6ba2b8bc4 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -13,7 +13,7 @@ Supports all data types that can be stored in table except `LowCardinality` and generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]); ``` -**Parameters** +**Arguments** - `name` — Name of corresponding column. - `TypeName` — Type of corresponding column. diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index eec4a1d0c46..14cd4369285 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -13,7 +13,7 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) ``` -**Parameters** +**Arguments** - `host:port` — MySQL server address. diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 9997971af65..08096c2b019 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -13,7 +13,7 @@ Turns a subquery into a table. The function implements views (see [CREATE VIEW]( view(subquery) ``` -**Parameters** +**Arguments** - `subquery` — `SELECT` query. From e485d4cad8e21e721ad250f9117b5717a6d64fd7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 16 Feb 2021 09:27:48 +0300 Subject: [PATCH 78/94] Fix SIGSEGV on Unknown packet for Distributed queries On Unknown packet disconnect() will be called, which will reset the input stream, so no need to call setAsyncCallback(): [ 42015 ] {} BaseDaemon: (version 21.3.1.1, build id: 4F9644AF560F6BB6) (from thread 45051) (no query) Received signal Segmentation fault (11) [ 42015 ] {} BaseDaemon: Address: 0x90 Access: read. Address not mapped to object. [ 42015 ] {} BaseDaemon: Stack trace: 0xf82e0f4 0xf82de19 0xf83b9a5 0xf83c0e0 0xe9a6fa7 0xf95016c 0xf950939 0xf95020c 0xf950939 0xf95020c 0xf950939 0xf95020c 0xf9508f9 0xf953e40 0xf958376 0x88056af 0x8809143 0x7f4b3e1aaf27 0x7f4b3e0dc31f [ 42015 ] {} BaseDaemon: 2. ext::basic_scope_guard)::$_3>::~basic_scope_guard() @ 0xf82e0f4 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug [ 42015 ] {} BaseDaemon: 3. DB::Connection::receivePacket(std::__1::function) @ 0xf82de19 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug [ 42015 ] {} BaseDaemon: 4. DB::MultiplexedConnections::receivePacketUnlocked(std::__1::function) @ 0xf83b9a5 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug [ 42015 ] {} BaseDaemon: 5. DB::MultiplexedConnections::drain() @ 0xf83c0e0 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug [ 42015 ] {} BaseDaemon: 6. DB::RemoteQueryExecutor::finish(std::__1::unique_ptr >*) @ 0xe9a6fa7 in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug [ 42015 ] {} BaseDaemon: 7. DB::PipelineExecutor::tryAddProcessorToStackIfUpdated() @ 0xf95016c in /usr/lib/debug/.build-id/4f/9644af560f6bb6.debug ... --- src/Client/Connection.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e38a6b240a6..164b9565633 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -756,7 +756,11 @@ std::optional Connection::checkPacket(size_t timeout_microseconds) Packet Connection::receivePacket(std::function async_callback) { in->setAsyncCallback(std::move(async_callback)); - SCOPE_EXIT(in->setAsyncCallback({})); + SCOPE_EXIT({ + /// disconnect() will reset "in". + if (in) + in->setAsyncCallback({}); + }); try { From e39215e38bb6c82fa863f1c117eded0389d7a381 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 16 Feb 2021 11:03:02 +0300 Subject: [PATCH 79/94] Fix has_some condition on CollectJoinOnKeysVisitor --- src/Interpreters/CollectJoinOnKeysVisitor.cpp | 3 ++- src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 9033dd0f0f8..a0ea27e9905 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -49,7 +49,8 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const else throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.", ErrorCodes::AMBIGUOUS_COLUMN_NAME); - has_some = true; + if (table_no.first != table_no.second && table_no.first > 0 && table_no.second > 0) + has_some = true; } void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 22356622f8d..cef4a0203bb 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -427,7 +427,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele { data.asofToJoinKeys(); } - else if (data.new_where_conditions && data.new_on_expression) + else if (data.new_on_expression) { table_join.on_expression = data.new_on_expression; new_where_conditions = data.new_where_conditions; From 3d19d0644ebbf292eebf1135aac059a08f2d6c82 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 16 Feb 2021 13:46:25 +0300 Subject: [PATCH 80/94] Update join on associativity in some tests --- tests/queries/0_stateless/00826_cross_to_inner_join.reference | 2 +- tests/queries/0_stateless/00849_multiple_comma_join_2.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference index e7c8d6b1ea9..84867de2849 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -95,7 +95,7 @@ SELECT t2_00826.a, t2_00826.b FROM t1_00826 -ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b) +ALL INNER JOIN t2_00826 ON (((a = t2_00826.a) AND (a = t2_00826.a)) AND (a = t2_00826.a)) AND (b = t2_00826.b) WHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction SELECT diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference index fc39ef13935..4db65b0b795 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference @@ -127,7 +127,7 @@ FROM ) AS `--.s` CROSS JOIN t3 ) AS `--.s` -ALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) +ALL INNER JOIN t4 ON ((a = `--t1.a`) AND (a = `--t2.a`)) AND (a = `--t3.a`) WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) SELECT `--t1.a` AS `t1.a` FROM From 0b0b481825ba2e71074823d2d0bbce043e6e9b4f Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Tue, 16 Feb 2021 14:02:33 +0300 Subject: [PATCH 81/94] DOCSUP-5602: Edited and translated to russian (#20302) * Edited and added translation * Minor fixes * Fix typo Co-authored-by: George Co-authored-by: Ivan <5627721+abyss7@users.noreply.github.com> --- .../sql-reference/functions/url-functions.md | 146 +++++++++++++++- .../sql-reference/functions/url-functions.md | 162 ++++++++++++++++++ 2 files changed, 299 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 006542f494a..975695f40b3 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -133,10 +133,9 @@ For example: ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} -Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if: +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. -- you need fresh TLD list, -- or you have custom. +Can be useful if you need fresh TLD list or you have custom. Configuration example: @@ -149,21 +148,150 @@ Configuration example: ``` -Example: +**Syntax** -- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`. +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name. + +Can be useful if you need fresh TLD list or you have custom. + +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} -Same as `firstSignificantSubdomain` but accept custom TLD list name. +Returns the first significant subdomain. Accepts customs TLD list name. -### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} +Can be useful if you need fresh TLD list or you have custom. -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- First significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) {#port} diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 1008e2a359c..7541e16bed4 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk') Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня). + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} + +Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Первый существенный поддомен. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### port(URL[, default_port = 0]) {#port} Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) From dc32d1fa4196d496d8433d97b7e8f199e3a8a7f2 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Tue, 16 Feb 2021 14:21:23 +0300 Subject: [PATCH 82/94] Make `Arguments` bold in doc --- docs/en/sql-reference/functions/other-functions.md | 2 +- docs/en/sql-reference/functions/string-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index dcbb7d1ffeb..04e921b5c55 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -460,7 +460,7 @@ Allows building a unicode-art diagram. `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. -Arguments: +**Arguments** - `x` — Size to display. - `min, max` — Integer constants. The value must fit in `Int64`. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 3f6ffeee654..dc5304b39aa 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i toValidUTF8( input_string ) ``` -Arguments: +**Arguments** - input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. From 7c5d8458661d644aebb607fd344c82478143ea1f Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Tue, 16 Feb 2021 15:37:49 +0300 Subject: [PATCH 83/94] refactor function --- src/Functions/FunctionFile.cpp | 175 +++++++++++------- src/IO/ReadBufferFromFile.h | 4 +- .../01658_read_file_to_stringcolumn.reference | 3 + .../01658_read_file_to_stringcolumn.sh | 6 +- 4 files changed, 113 insertions(+), 75 deletions(-) diff --git a/src/Functions/FunctionFile.cpp b/src/Functions/FunctionFile.cpp index e4327862982..f477f6123c3 100644 --- a/src/Functions/FunctionFile.cpp +++ b/src/Functions/FunctionFile.cpp @@ -11,93 +11,124 @@ namespace DB { - namespace ErrorCodes +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NOT_IMPLEMENTED; + extern const int INCORRECT_FILE_NAME; + extern const int DATABASE_ACCESS_DENIED; + extern const int FILE_DOESNT_EXIST; +} + +/// A function to read file as a string. +class FunctionFile : public IFunction +{ +public: + static constexpr auto name = "file"; + static FunctionPtr create(const Context &context) { return std::make_shared(context); } + explicit FunctionFile(const Context &context_) : context(context_) {} + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - extern const int ILLEGAL_COLUMN; - extern const int NOT_IMPLEMENTED; - extern const int INCORRECT_FILE_NAME; - extern const int DATABASE_ACCESS_DENIED; + if (!isString(arguments[0].type)) + throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED); + return std::make_shared(); } - /** A function to read file as a string. - */ - class FunctionFile : public IFunction + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - public: - static constexpr auto name = "file"; - static FunctionPtr create(const Context &context) { return std::make_shared(context); } - explicit FunctionFile(const Context &context_) : context(context_) {} + const ColumnPtr column = arguments[0].column; + const ColumnString * expected = checkAndGetColumn(column.get()); + if (!expected) + throw Exception( + fmt::format("Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); - String getName() const override { return name; } + const ColumnString::Chars & chars = expected->getChars(); + const ColumnString::Offsets & offsets = expected->getOffsets(); - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + std::vector checked_filenames(input_rows_count); - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + auto result = ColumnString::create(); + auto & res_chars = result->getChars(); + auto & res_offsets = result->getOffsets(); + + res_offsets.resize(input_rows_count); + + size_t source_offset = 0; + size_t result_offset = 0; + for (size_t row = 0; row < input_rows_count; ++row) { - if (!isString(arguments[0].type)) - throw Exception(getName() + " is only implemented for types String", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(); + const char * filename = reinterpret_cast(&chars[source_offset]); + + const String user_files_path = context.getUserFilesPath(); + String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); + Poco::Path poco_filepath = Poco::Path(filename); + if (poco_filepath.isRelative()) + poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); + const String file_absolute_path = poco_filepath.absolute().toString(); + checkReadIsAllowedOrThrow(user_files_absolute_path, file_absolute_path); + + checked_filenames[row] = file_absolute_path; + auto file = Poco::File(file_absolute_path); + + if (!file.exists()) + throw Exception(fmt::format("File {} doesn't exist.", file_absolute_path), ErrorCodes::FILE_DOESNT_EXIST); + + const auto current_file_size = Poco::File(file_absolute_path).getSize(); + + result_offset += current_file_size + 1; + res_offsets[row] = result_offset; + source_offset = offsets[row]; } - bool useDefaultImplementationForConstants() const override { return true; } + res_chars.resize(result_offset); - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + size_t prev_offset = 0; + + for (size_t row = 0; row < input_rows_count; ++row) { - const auto & column = arguments[0].column; - const char * filename = nullptr; - if (const auto * column_string = checkAndGetColumn(column.get())) - { - const auto & filename_chars = column_string->getChars(); - filename = reinterpret_cast(&filename_chars[0]); - auto res = ColumnString::create(); - auto & res_chars = res->getChars(); - auto & res_offsets = res->getOffsets(); + auto file_absolute_path = checked_filenames[row]; + ReadBufferFromFile in(file_absolute_path); + char * res_buf = reinterpret_cast(&res_chars[prev_offset]); - const String user_files_path = context.getUserFilesPath(); - String user_files_absolute_path = Poco::Path(user_files_path).makeAbsolute().makeDirectory().toString(); - Poco::Path poco_filepath = Poco::Path(filename); - if (poco_filepath.isRelative()) - poco_filepath = Poco::Path(user_files_absolute_path, poco_filepath); - const String file_absolute_path = poco_filepath.absolute().toString(); - checkReadIsAllowed(user_files_absolute_path, file_absolute_path); - - ReadBufferFromFile in(file_absolute_path); - ssize_t file_len = Poco::File(file_absolute_path).getSize(); - res_chars.resize_exact(file_len + 1); - char *res_buf = reinterpret_cast(&res_chars[0]); - in.readStrict(res_buf, file_len); - res_offsets.push_back(file_len + 1); - res_buf[file_len] = '\0'; - - return res; - } - else - { - throw Exception("Bad Function arguments for file() " + std::string(filename), ErrorCodes::ILLEGAL_COLUMN); - } + const size_t file_lenght = res_offsets[row] - prev_offset - 1; + prev_offset = res_offsets[row]; + in.readStrict(res_buf, file_lenght); + res_buf[file_lenght] = '\0'; } - private: - void checkReadIsAllowed(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const - { - // If run in Local mode, no need for path checking. - if (context.getApplicationType() != Context::ApplicationType::LOCAL) - if (file_absolute_path.find(user_files_absolute_path) != 0) - throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - Poco::File path_poco_file = Poco::File(file_absolute_path); - if (path_poco_file.exists() && path_poco_file.isDirectory()) - throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); - } - - const Context & context; - }; - - - void registerFunctionFile(FunctionFactory & factory) - { - factory.registerFunction(); + return result; } +private: + + void checkReadIsAllowedOrThrow(const std::string & user_files_absolute_path, const std::string & file_absolute_path) const + { + // If run in Local mode, no need for path checking. + if (context.getApplicationType() != Context::ApplicationType::LOCAL) + if (file_absolute_path.find(user_files_absolute_path) != 0) + throw Exception("File is not inside " + user_files_absolute_path, ErrorCodes::DATABASE_ACCESS_DENIED); + + Poco::File path_poco_file = Poco::File(file_absolute_path); + if (path_poco_file.exists() && path_poco_file.isDirectory()) + throw Exception("File can't be a directory", ErrorCodes::INCORRECT_FILE_NAME); + } + + const Context & context; +}; + + +void registerFunctionFile(FunctionFactory & factory) +{ + factory.registerFunction(); +} + } diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index cebda605b21..33365bc7ceb 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -25,11 +25,11 @@ protected: CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForRead}; public: - ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, + explicit ReadBufferFromFile(const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, char * existing_memory = nullptr, size_t alignment = 0); /// Use pre-opened file descriptor. - ReadBufferFromFile( + explicit ReadBufferFromFile( int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference index a22076de920..87659c32e39 100644 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -5,6 +5,9 @@ aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb ccccccccc aaaaaaaaa bbbbbbbbb :0 +aaaaaaaaa +bbbbbbbbb +ccccccccc :107 :79 :35 diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 43e1e11a193..0359d803a23 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -28,7 +28,11 @@ ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${u ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? ${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$? - +${CLICKHOUSE_CLIENT} --multiquery --query " + create table filenames(name String) engine=MergeTree() order by tuple(); + insert into filenames values ('a.txt'), ('b.txt'), ('c.txt'); + select file(name) from filenames format TSV; +" # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) # Test non-exists file From b404fea18d2175c27683938291901be2bfdb4728 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Tue, 16 Feb 2021 15:40:09 +0300 Subject: [PATCH 84/94] better --- tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 0359d803a23..593f0e59ea7 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -32,6 +32,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query " create table filenames(name String) engine=MergeTree() order by tuple(); insert into filenames values ('a.txt'), ('b.txt'), ('c.txt'); select file(name) from filenames format TSV; + drop table if exists filenames; " # Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) From e37e48b3245fb38b7f11e6b43e069c37a3ad34dc Mon Sep 17 00:00:00 2001 From: Sergi Almacellas Abellana Date: Tue, 16 Feb 2021 14:31:04 +0100 Subject: [PATCH 85/94] Fix typo and ReplicatedMergeTree link on tutorial I was reading your online documentation and I found that there was a typo on the sql command and there was some missing link. Not quite familiar with the clickhouse contribution process, I just edited the files fix directly from github, let me know if there is something else missing from my side. Hope this helps! --- docs/en/getting-started/tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index 64363c963c5..fe697972dff 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first ``` sql CREATE TABLE tutorial.hits_replica (...) -ENGINE = ReplcatedMergeTree( +ENGINE = ReplicatedMergeTree( '/clickhouse_perftest/tables/{shard}/hits', '{replica}' ) From 94ba4942d76773df87fd02ed5cf0acb735ee10c6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhailov Date: Tue, 16 Feb 2021 19:47:45 +0300 Subject: [PATCH 86/94] empty From f83be158ba986b86df8c819b87a0b90d1009068e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Feb 2021 18:59:05 +0300 Subject: [PATCH 87/94] SHOW TABLES is now considered as one query in the quota calculations, not two queries. --- .../InterpreterShowProcesslistQuery.h | 5 +++++ src/Interpreters/InterpreterShowTablesQuery.h | 5 +++++ tests/integration/test_quota/test.py | 15 +++++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.h b/src/Interpreters/InterpreterShowProcesslistQuery.h index 6b87fd7edc3..fa0bbf075bd 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.h +++ b/src/Interpreters/InterpreterShowProcesslistQuery.h @@ -20,6 +20,11 @@ public: BlockIO execute() override; + /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then + /// the SELECT query will checks the quota and limits. + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + private: ASTPtr query_ptr; Context & context; diff --git a/src/Interpreters/InterpreterShowTablesQuery.h b/src/Interpreters/InterpreterShowTablesQuery.h index fc5cb2b7505..4f720e68622 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.h +++ b/src/Interpreters/InterpreterShowTablesQuery.h @@ -20,6 +20,11 @@ public: BlockIO execute() override; + /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then + /// the SELECT query will checks the quota and limits. + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + private: ASTPtr query_ptr; Context & context; diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 84454159a58..9289ba47209 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -71,12 +71,12 @@ def started_cluster(): @pytest.fixture(autouse=True) def reset_quotas_and_usage_info(): try: - yield - finally: - copy_quota_xml('simpliest.xml') # To reset usage info. instance.query("DROP QUOTA IF EXISTS qA, qB") copy_quota_xml('simpliest.xml') # To reset usage info. copy_quota_xml('normal_limits.xml') + yield + finally: + pass def test_quota_from_users_xml(): @@ -379,4 +379,11 @@ def test_query_inserts(): instance.query("INSERT INTO test_table values(1)") system_quota_usage( - [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) \ No newline at end of file + [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + +def test_consumption_show_tables_quota(): + instance.query("SHOW TABLES") + + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N\\t19\\t\\\\N\\t1\\t1000\\t35\\t\\\\N\\t.*\\t\\\\N\n", + instance.query("SHOW QUOTA")) From d8d2bd885c72ae06707f0a15001f2bfb7ba21054 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Feb 2021 22:14:44 +0300 Subject: [PATCH 88/94] Fix calculation of interval's end in quota consumption. --- src/Access/EnabledQuota.cpp | 43 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index e9d586a692f..e865ffb9b25 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -39,35 +39,47 @@ struct EnabledQuota::Impl } + /// Returns the end of the current interval. If the passed `current_time` is greater than that end, + /// the function automatically recalculates the interval's end by adding the interval's duration + /// one or more times until the interval's end is greater than `current_time`. + /// If that recalculation occurs the function also resets amounts of resources used and sets the variable + /// `counters_were_reset`. static std::chrono::system_clock::time_point getEndOfInterval( - const Interval & interval, std::chrono::system_clock::time_point current_time, bool * counters_were_reset = nullptr) + const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset) { auto & end_of_interval = interval.end_of_interval; auto end_loaded = end_of_interval.load(); auto end = std::chrono::system_clock::time_point{end_loaded}; if (current_time < end) { - if (counters_were_reset) - *counters_were_reset = false; + counters_were_reset = false; return end; } - const auto duration = interval.duration; + /// We reset counters only if the interval's end has been calculated before. + /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. + bool need_reset_counters = (end_loaded.count() != 0); do { - end = end + (current_time - end + duration) / duration * duration; + /// Calculate the end of the next interval: + /// | X | + /// end current_time next_end = end + duration * n + /// where n is an integer number, n >= 1. + const auto duration = interval.duration; + UInt64 n = static_cast((current_time - end + duration) / duration); + end = end + duration * n; if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) - { - boost::range::fill(interval.used, 0); break; - } end = std::chrono::system_clock::time_point{end_loaded}; } while (current_time >= end); - if (counters_were_reset) - *counters_were_reset = true; + if (need_reset_counters) + { + boost::range::fill(interval.used, 0); + counters_were_reset = true; + } return end; } @@ -89,7 +101,7 @@ struct EnabledQuota::Impl if (used > max) { bool counters_were_reset = false; - auto end_of_interval = getEndOfInterval(interval, current_time, &counters_were_reset); + auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); if (counters_were_reset) { used = (interval.used[resource_type] += amount); @@ -116,9 +128,9 @@ struct EnabledQuota::Impl continue; if (used > max) { - bool used_counters_reset = false; - std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, &used_counters_reset); - if (!used_counters_reset) + bool counters_were_reset = false; + std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + if (!counters_were_reset) throwQuotaExceed(user_name, intervals.quota_name, resource_type, used, max, interval.duration, end_of_interval); } } @@ -177,7 +189,8 @@ std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_ auto & out = usage.intervals.back(); out.duration = in.duration; out.randomize_interval = in.randomize_interval; - out.end_of_interval = Impl::getEndOfInterval(in, current_time); + bool counters_were_reset = false; + out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset); for (auto resource_type : ext::range(MAX_RESOURCE_TYPE)) { if (in.max[resource_type]) From 298130402ebd2327af746ba2785a6c1cf1e684ea Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Feb 2021 22:38:19 +0300 Subject: [PATCH 89/94] SYSTEM queries now consume quota. --- src/Interpreters/InterpreterSystemQuery.h | 3 --- ...myquota.xml => assign_myquota_to_default_user.xml} | 0 .../configs/users.d/{quota.xml => myquota.xml} | 0 .../test_quota/configs/users.d/user_with_no_quota.xml | 10 ++++++++++ tests/integration/test_quota/test.py | 11 +++++++---- 5 files changed, 17 insertions(+), 7 deletions(-) rename tests/integration/test_quota/configs/users.d/{assign_myquota.xml => assign_myquota_to_default_user.xml} (100%) rename tests/integration/test_quota/configs/users.d/{quota.xml => myquota.xml} (100%) create mode 100644 tests/integration/test_quota/configs/users.d/user_with_no_quota.xml diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 6fd96c15a2e..6fa0a432191 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -37,9 +37,6 @@ public: BlockIO execute() override; - bool ignoreQuota() const override { return true; } - bool ignoreLimits() const override { return true; } - private: ASTPtr query_ptr; Context & context; diff --git a/tests/integration/test_quota/configs/users.d/assign_myquota.xml b/tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml similarity index 100% rename from tests/integration/test_quota/configs/users.d/assign_myquota.xml rename to tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml diff --git a/tests/integration/test_quota/configs/users.d/quota.xml b/tests/integration/test_quota/configs/users.d/myquota.xml similarity index 100% rename from tests/integration/test_quota/configs/users.d/quota.xml rename to tests/integration/test_quota/configs/users.d/myquota.xml diff --git a/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml new file mode 100644 index 00000000000..70f51cfff43 --- /dev/null +++ b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml @@ -0,0 +1,10 @@ + + + + + + ::/0 + + + + diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 9289ba47209..353d776c0f3 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -7,9 +7,10 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota.xml", +instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota_to_default_user.xml", "configs/users.d/drop_default_quota.xml", - "configs/users.d/quota.xml"]) + "configs/users.d/myquota.xml", + "configs/users.d/user_with_no_quota.xml"]) def check_system_quotas(canonical): @@ -49,9 +50,11 @@ def system_quotas_usage(canonical): def copy_quota_xml(local_file_name, reload_immediately=True): script_dir = os.path.dirname(os.path.realpath(__file__)) instance.copy_file_to_container(os.path.join(script_dir, local_file_name), - '/etc/clickhouse-server/users.d/quota.xml') + '/etc/clickhouse-server/users.d/myquota.xml') if reload_immediately: - instance.query("SYSTEM RELOAD CONFIG") + # We use the special user 'user_with_no_quota' here because + # we don't want SYSTEM RELOAD CONFIG to mess our quota consuming checks. + instance.query("SYSTEM RELOAD CONFIG", user='user_with_no_quota') @pytest.fixture(scope="module", autouse=True) From d357fb9129b09a1749e6055bd19ef57f4187ffb1 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Feb 2021 22:39:08 +0300 Subject: [PATCH 90/94] Fix reading from the table system.quota_usage. --- src/Storages/System/StorageSystemQuotaUsage.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 002ab081bcf..6d6e22e7be6 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -137,6 +137,9 @@ void StorageSystemQuotaUsage::fillDataImpl( column_quota_name.insertData(quota_name.data(), quota_name.length()); column_quota_key.insertData(quota_key.data(), quota_key.length()); + if (add_column_is_current) + column_is_current->push_back(quota_id == current_quota_id); + if (!interval) { column_start_time.insertDefault(); @@ -171,9 +174,6 @@ void StorageSystemQuotaUsage::fillDataImpl( addValue(*column_max[resource_type], *column_max_null_map[resource_type], interval->max[resource_type], type_info); addValue(*column_usage[resource_type], *column_usage_null_map[resource_type], interval->used[resource_type], type_info); } - - if (add_column_is_current) - column_is_current->push_back(quota_id == current_quota_id); }; auto add_rows = [&](const String & quota_name, const UUID & quota_id, const String & quota_key, const std::vector & intervals) From 5f8a6ab9c109a82ab044b6ee573f86320175839a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Feb 2021 12:29:33 +0300 Subject: [PATCH 91/94] remove probably useless code --- src/Access/EnabledQuota.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index e865ffb9b25..4a77426004d 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -26,10 +26,6 @@ struct EnabledQuota::Impl std::chrono::seconds duration, std::chrono::system_clock::time_point end_of_interval) { - std::function amount_to_string = [](UInt64 amount) { return std::to_string(amount); }; - if (resource_type == Quota::EXECUTION_TIME) - amount_to_string = [&](UInt64 amount) { return ext::to_string(std::chrono::nanoseconds(amount)); }; - const auto & type_info = Quota::ResourceTypeInfo::get(resource_type); throw Exception( "Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: " From 29362bb483a9f8390e9e2016a9ed6b6c4acf116a Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 16 Feb 2021 21:48:26 +0000 Subject: [PATCH 92/94] Support vhost --- .../en/engines/table-engines/integrations/rabbitmq.md | 11 ++++++++++- .../ru/engines/table-engines/integrations/rabbitmq.md | 11 ++++++++++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 7 +++++-- src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 + .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 6 +++++- src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h | 2 ++ 6 files changed, 33 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index b0901ee6f6e..c73876fdebe 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -59,10 +59,11 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Required configuration: The RabbitMQ server configuration should be added using the ClickHouse config file. +Required configuration: + ``` xml root @@ -70,6 +71,14 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi ``` +Additional configuration: + +``` xml + + clickhouse + +``` + Example: ``` sql diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index dedb5842d68..2a44e085ede 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -52,10 +52,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Требуемая конфигурация: Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse. +Требуемая конфигурация: + ``` xml root @@ -63,6 +64,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` +Дополнительная конфигурация: + +``` xml + + clickhouse + +``` + Example: ``` sql diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 3ee9dda2bf3..d14f11c4a29 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -94,6 +94,7 @@ StorageRabbitMQ::StorageRabbitMQ( , login_password(std::make_pair( global_context.getConfigRef().getString("rabbitmq.username"), global_context.getConfigRef().getString("rabbitmq.password"))) + , vhost(global_context.getConfigRef().getString("rabbitmq.vhost", "/")) , semaphore(0, num_consumers) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) @@ -483,7 +484,9 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting) } connection = std::make_unique(event_handler.get(), - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + AMQP::Address( + parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), vhost)); cnt_retries = 0; while (!connection->ready() && !stream_cancelled && ++cnt_retries != RETRIES_MAX) @@ -702,7 +705,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { return std::make_shared( - parsed_address, global_context, login_password, routing_keys, exchange_name, exchange_type, + parsed_address, global_context, login_password, vhost, routing_keys, exchange_name, exchange_type, producer_id.fetch_add(1), persistent, wait_confirm, log, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 893c5167a97..aa316e7a842 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -94,6 +94,7 @@ private: String address; std::pair parsed_address; std::pair login_password; + String vhost; std::unique_ptr loop; std::shared_ptr event_handler; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 08b95d46115..ac1b253b4bb 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -29,6 +29,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( std::pair & parsed_address_, const Context & global_context, const std::pair & login_password_, + const String & vhost_, const Names & routing_keys_, const String & exchange_name_, const AMQP::ExchangeType exchange_type_, @@ -42,6 +43,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( : WriteBuffer(nullptr, 0) , parsed_address(parsed_address_) , login_password(login_password_) + , vhost(vhost_) , routing_keys(routing_keys_) , exchange_name(exchange_name_) , exchange_type(exchange_type_) @@ -149,7 +151,9 @@ bool WriteBufferToRabbitMQProducer::setupConnection(bool reconnecting) } connection = std::make_unique(event_handler.get(), - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login(login_password.first, login_password.second), "/")); + AMQP::Address( + parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), vhost)); cnt_retries = 0; while (!connection->ready() && ++cnt_retries != RETRIES_MAX) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 2897e20b21d..e88f92239ca 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -21,6 +21,7 @@ public: std::pair & parsed_address_, const Context & global_context, const std::pair & login_password_, + const String & vhost_, const Names & routing_keys_, const String & exchange_name_, const AMQP::ExchangeType exchange_type_, @@ -53,6 +54,7 @@ private: std::pair parsed_address; const std::pair login_password; + const String vhost; const Names routing_keys; const String exchange_name; AMQP::ExchangeType exchange_type; From c809af5dc251cd4087002534ffab9f08dbd63daa Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 17 Feb 2021 12:56:57 +0800 Subject: [PATCH 93/94] ignore data store files --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 1e9765dca9e..d33dbf0600d 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,9 @@ website/package-lock.json /prof *.iml + +# data store +/programs/server/data +/programs/server/metadata +/programs/server/store + From 42c22475e31a1a94731825987d7ef6c77f22ecbc Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 17 Feb 2021 18:55:24 +0300 Subject: [PATCH 94/94] Don't backport base commit of branch in the same branch (#20628) --- utils/github/backport.py | 2 +- utils/github/local.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/github/backport.py b/utils/github/backport.py index 576e3b069c2..7fddbbee241 100644 --- a/utils/github/backport.py +++ b/utils/github/backport.py @@ -62,7 +62,7 @@ class Backport: RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$') RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$') - # pull-requests are sorted by ancestry from the least recent. + # pull-requests are sorted by ancestry from the most recent. for pr in pull_requests: while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']): logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0])) diff --git a/utils/github/local.py b/utils/github/local.py index a997721bc76..2ad8d4b8b71 100644 --- a/utils/github/local.py +++ b/utils/github/local.py @@ -6,15 +6,15 @@ import os import re -class RepositoryBase(object): +class RepositoryBase: def __init__(self, repo_path): import git self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) - # commit comparator + # comparator of commits def cmp(x, y): - if x == y: + if str(x) == str(y): return 0 if self._repo.is_ancestor(x, y): return -1