Merge branch 'master' into minor-cgroup-improvements

This commit is contained in:
Sergei Trifonov 2023-06-02 18:14:24 +02:00 committed by GitHub
commit 6243a16824
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
112 changed files with 2884 additions and 771 deletions

View File

@ -3,5 +3,5 @@
set -x
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
## Specifics and Recommendations {#specifics-and-recommendations}

View File

@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
```
:::warning
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
:::
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.

View File

@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
## Specifics and Recommendations {#specifics-and-recommendations}
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
## Usage Example {#usage-example}

View File

@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| rpc\_client\_connect\_timeout | 600 * 1000 |
| rpc\_client\_read\_timeout | 3600 * 1000 |
| rpc\_client\_write\_timeout | 3600 * 1000 |
| rpc\_client\_socekt\_linger\_timeout | -1 |
| rpc\_client\_socket\_linger\_timeout | -1 |
| rpc\_client\_connect\_retry | 10 |
| rpc\_client\_timeout | 3600 * 1000 |
| dfs\_default\_replica | 3 |
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| output\_write\_timeout | 3600 * 1000 |
| output\_close\_timeout | 3600 * 1000 |
| output\_packetpool\_size | 1024 |
| output\_heeartbeat\_interval | 10 * 1000 |
| output\_heartbeat\_interval | 10 * 1000 |
| dfs\_client\_failover\_max\_attempts | 15 |
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
| dfs\_client\_socketcache\_expiryMsec | 3000 |

View File

@ -6,7 +6,7 @@ sidebar_label: Hive
# Hive
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
- Text: only supports simple scalar column types except `binary`

View File

@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
`NATS` lets you:
- Publish or subcribe to message subjects.
- Publish or subscribe to message subjects.
- Process new messages as they become available.
## Creating a Table {#table_engine-redisstreams-creating-a-table}
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `nats_url` host:port (for example, `localhost:5672`)..
- `nats_subjects` List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_subjects` List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:

View File

@ -57,7 +57,7 @@ or via config (since version 21.11):
</named_collections>
```
Some parameters can be overriden by key value arguments:
Some parameters can be overridden by key value arguments:
``` sql
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
```

View File

@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### PARTITION BY
@ -140,8 +140,8 @@ The following settings can be set before query execution or placed into configur
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

View File

@ -78,7 +78,7 @@ ENGINE = MergeTree
ORDER BY id;
```
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
@ -135,7 +135,7 @@ ORDER BY id;
Annoy supports `L2Distance` and `cosineDistance`.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
__Example__:
``` sql

View File

@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
The key factors for a good performance:
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
- partitions should be comparable in size, so all threads will do roughly the same amount of work

View File

@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
:::tip
Disks can also be declared in the `SETTINGS` section of a query. This is useful
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
See [dynamic storage](#dynamic-storage) for more details.
:::
@ -856,7 +856,7 @@ Tags:
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
Cofiguration examples:
Configuration examples:
``` xml
<storage_configuration>
@ -1224,7 +1224,7 @@ Limit parameters (mainly for internal usage):
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.

View File

@ -65,7 +65,7 @@ if __name__ == "__main__":
main()
```
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`:
```sql
CREATE TABLE my_executable_table (

View File

@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
## Supported operations {#table_engine-KeeperMap-supported-operations}

View File

@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
```sql
INSERT INTO amazon_reviews

View File

@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
:::
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
### Add the table **cell_towers** as a Superset **dataset**
@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number).
:::

View File

@ -806,7 +806,7 @@ FROM
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
```
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
## Authors with the most diverse impact
@ -940,7 +940,7 @@ LIMIT 10
10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.)
```
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions.
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions.
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=)

View File

@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
Earth System Science Data 13(2), 2021
https://doi.org/10.5194/essd-13-357-2021

View File

@ -542,7 +542,7 @@ LIMIT 10;
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
```
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
```sql
SELECT
@ -718,4 +718,3 @@ ORDER BY quarter ASC;
└────────────┴────────────┴───────────┴──────────┘
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
```

View File

@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
## Step-by-step instructions
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
```sql
DESCRIBE s3Cluster(
@ -322,7 +322,7 @@ ORDER BY month ASC;
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
### More subtitiles over time and when
### More subtitles over time and when
With advances in speech recognition, its easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?

View File

@ -275,9 +275,9 @@ Type: UInt64
Default: 1000
## max_concurrent_insert_queries
## max_concurrent_queries
Limit on total number of concurrent insert queries. Zero means Unlimited.
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -287,9 +287,9 @@ Type: UInt64
Default: 0
## max_concurrent_queries
## max_concurrent_insert_queries
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
Limit on total number of concurrent insert queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -1277,49 +1277,6 @@ For more information, see the section [Creating replicated tables](../../engines
<macros incl="macros" optional="true" />
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
```
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
Modifying the setting for one query or user does not affect other queries.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
```
**See Also**
- [max_concurrent_queries](#max-concurrent-queries)
## max_open_files {#max-open-files}
The maximum number of open files.
@ -1947,7 +1904,7 @@ Config fields:
- `regexp` - RE2 compatible regular expression (mandatory)
- `replace` - substitution string for sensitive data (optional, by default - six asterisks)
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries).
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parseable queries).
`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches.

View File

@ -1182,7 +1182,7 @@ Possible values:
- `bin` - as 16-bytes binary.
- `str` - as a string of 36 bytes.
- `ext` - as extention with ExtType = 2.
- `ext` - as extension with ExtType = 2.
Default value: `ext`.

View File

@ -646,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
However, the block size cannot be more than `max_block_size` rows.
By default: 1,000,000. It only works when reading from MergeTree engines.
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
```
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
Modifying the setting for one query or user does not affect other queries.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
```
**See Also**
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
@ -1050,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client.
Default value: 10, 300, 300.
## handshake_timeout_ms {#handshake-timeout-ms}
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
Default value: 10000.
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
@ -1107,7 +1155,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
@ -1128,7 +1176,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
@ -2030,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock
## distributed_push_down_limit {#distributed-push-down-limit}
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
Enables or disables [LIMIT](#limit) applying on each shard separately.
This will allow to avoid:
- Sending extra rows over network;
@ -2431,7 +2479,7 @@ Default value: 0.
## allow_introspection_functions {#settings-allow_introspection_functions}
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
Possible values:
@ -3492,7 +3540,7 @@ Default value: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
Possible values:

View File

@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.

View File

@ -12,7 +12,7 @@ Columns:
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadata_dropped directory.
- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
**Example**

View File

@ -43,7 +43,7 @@ Columns:
- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type.
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.

View File

@ -3,7 +3,7 @@ slug: /en/operations/system-tables/licenses
---
# licenses
Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
Columns:

View File

@ -100,7 +100,7 @@ Columns:
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
:::note
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simplest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
:::
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).

View File

@ -14,8 +14,8 @@ Columns:
- `['user_name']` — Connections with the same user name share the same quota.
- `['ip_address']` — Connections from the same IP share the same quota.
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `ip_address`.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the quota is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the quota is tracked for `ip_address`.
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
- `0` — The quota applies to users specify in the `apply_to_list`.

View File

@ -50,7 +50,7 @@ Columns:
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underlying `Buffer` table).
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).

View File

@ -43,7 +43,7 @@ Columns:
- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string.
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0.
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0.
**Example**

View File

@ -33,7 +33,7 @@ Columns with request response parameters:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
- `ZOK` — The request was executed seccessfully.
- `ZOK` — The request was executed successfully.
- `ZCONNECTIONLOSS` — The connection was lost.
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
- `ZSESSIONEXPIRED` — The session has expired.
@ -43,7 +43,7 @@ Columns with request response parameters:
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified children of this ZooKeeper node.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.

View File

@ -24,7 +24,7 @@ It is designed to retain the following properties of data:
Most of the properties above are viable for performance testing:
reading data, filtering, aggregatio, and sorting will work at almost the same speed
reading data, filtering, aggregation, and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.

View File

@ -356,7 +356,7 @@ Type: `UInt8`.
Lets consider an example of calculating the `retention` function to determine site traffic.
**1.** Сreate a table to illustrate an example.
**1.** Create a table to illustrate an example.
``` sql
CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory;

View File

@ -5,7 +5,7 @@ sidebar_position: 351
# cramersV
[Cramér's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramér's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
**Syntax**

View File

@ -6,7 +6,7 @@ sidebar_position: 352
# cramersVBiasCorrected
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).

View File

@ -6,7 +6,7 @@ sidebar_title: exponentialMovingAverage
## exponentialMovingAverage
Сalculates the exponential moving average of values for the determined time.
Calculates the exponential moving average of values for the determined time.
**Syntax**
@ -27,7 +27,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
**Returned values**
- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
- Returns an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).

View File

@ -5,7 +5,7 @@ sidebar_position: 125
# groupBitAnd
Applies bitwise `AND` for series of numbers.
Applies bit-wise `AND` for series of numbers.
``` sql
groupBitAnd(expr)

View File

@ -5,7 +5,7 @@ sidebar_position: 126
# groupBitOr
Applies bitwise `OR` for series of numbers.
Applies bit-wise `OR` for series of numbers.
``` sql
groupBitOr(expr)

View File

@ -5,7 +5,7 @@ sidebar_position: 127
# groupBitXor
Applies bitwise `XOR` for series of numbers.
Applies bit-wise `XOR` for series of numbers.
``` sql
groupBitXor(expr)

View File

@ -30,11 +30,11 @@ Samples must belong to continuous, one-dimensional probability distributions.
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
And the alternative is that the distributions are not identical.
- `'greater'`
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
The null hypothesis is that values in the first sample are *stochastically smaller* than those in the second one,
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
- `'less'`.
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
The null hypothesis is that values in the first sample are *stochastically greater* than those in the second one,
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).

View File

@ -14,7 +14,7 @@ The result depends on the order of running the query, and is nondeterministic.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
:::note
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significant error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
:::
**Syntax**

View File

@ -18,7 +18,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`.
2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`.
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`.
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods.
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
### Usage

View File

@ -22,7 +22,7 @@ Resolution: 1 second.
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (2020-01-01 05:00:01).
Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
Timezone agnostic Unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions etc.). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia.
@ -30,7 +30,7 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isnt explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
@ -120,9 +120,9 @@ FROM dt
As timezone conversion only changes the metadata, the operation has no computation cost.
## Limitations on timezones support
## Limitations on time zones support
Some timezones may not be supported completely. There are a few cases:
Some time zones may not be supported completely. There are a few cases:
If the offset from UTC is not a multiple of 15 minutes, the calculation of hours and minutes can be incorrect. For example, the time zone in Monrovia, Liberia has offset UTC -0:44:30 before 7 Jan 1972. If you are doing calculations on the historical time in Monrovia timezone, the time processing functions may give incorrect results. The results after 7 Jan 1972 will be correct nevertheless.

View File

@ -27,7 +27,7 @@ ClickHouse data types include:
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type)
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
- **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)

View File

@ -247,7 +247,7 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
### hashed
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type.
@ -984,7 +984,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
...
```
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronous and asynchronous updates are supported.
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
@ -1243,8 +1243,8 @@ Setting fields:
- `password` Password required for the authentication.
- `headers` All custom HTTP headers entries used for the HTTP request. Optional parameter.
- `header` Single HTTP header entry.
- `name` Identifiant name used for the header send on the request.
- `value` Value set for a specific identifiant name.
- `name` Identifier name used for the header send on the request.
- `value` Value set for a specific identifier name.
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.

View File

@ -140,7 +140,7 @@ range([start, ] end [, step])
**Implementation details**
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
**Examples**
@ -1236,7 +1236,7 @@ arrayAUC(arr_scores, arr_labels)
**Arguments**
- `arr_scores` — scores prediction model gives.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
**Returned value**

View File

@ -226,7 +226,7 @@ Result:
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
The conjuction for bitwise operations:
The conjuction for bit-wise operations:
0 AND 0 = 0
@ -291,7 +291,7 @@ Result:
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
The disjunction for bitwise operations:
The disjunction for bit-wise operations:
0 OR 0 = 0

View File

@ -487,7 +487,7 @@ cosineDistance(vector1, vector2)
**Returned value**
- Cosine of the angle between two vectors substracted from one.
- Cosine of the angle between two vectors subtracted from one.
Type: [Float](../../sql-reference/data-types/float.md).

View File

@ -31,9 +31,9 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Arguments**
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
@ -165,7 +165,7 @@ Received exception from server (version 22.6.1):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
While `aes_encrypt_mysql` produces MySQL-compatible output:
Query:
@ -233,7 +233,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
@ -364,7 +364,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string).
**Returned value**

View File

@ -6,7 +6,7 @@ sidebar_label: Files
## file
Reads file as string and loads the data into the specified column. The actual file content is not interpreted.
Reads a file as string and loads the data into the specified column. The file content is not interpreted.
Also see table function [file](../table-functions/file.md).
@ -18,15 +18,13 @@ file(path[, default])
**Arguments**
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
Inserting data from files a.txt and b.txt into a table as strings:
Query:
``` sql
INSERT INTO table SELECT file('a.txt'), file('b.txt');
```

View File

@ -8,7 +8,7 @@ sidebar_label: Nullable
## isNull
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
``` sql
isNull(x)
@ -18,7 +18,7 @@ Alias: `ISNULL`.
**Arguments**
- `x` — A value with a non-compound data type.
- `x` — A value of non-compound data type.
**Returned value**
@ -27,7 +27,7 @@ Alias: `ISNULL`.
**Example**
Input table
Table:
``` text
┌─x─┬────y─┐
@ -36,12 +36,14 @@ Input table
└───┴──────┘
```
Query
Query:
``` sql
SELECT x FROM t_null WHERE isNull(y);
```
Result:
``` text
┌─x─┐
│ 1 │
@ -50,7 +52,7 @@ SELECT x FROM t_null WHERE isNull(y);
## isNotNull
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
``` sql
isNotNull(x)
@ -58,16 +60,16 @@ isNotNull(x)
**Arguments:**
- `x` — A value with a non-compound data type.
- `x` — A value of non-compound data type.
**Returned value**
- `0` if `x` is `NULL`.
- `1` if `x` is not `NULL`.
- `0` if `x` is `NULL`.
**Example**
Input table
Table:
``` text
┌─x─┬────y─┐
@ -76,12 +78,14 @@ Input table
└───┴──────┘
```
Query
Query:
``` sql
SELECT x FROM t_null WHERE isNotNull(y);
```
Result:
``` text
┌─x─┐
│ 2 │
@ -90,7 +94,7 @@ SELECT x FROM t_null WHERE isNotNull(y);
## coalesce
Checks from left to right whether `NULL` arguments were passed and returns the first non-`NULL` argument.
Returns the leftmost non-`NULL` argument.
``` sql
coalesce(x,...)
@ -98,11 +102,11 @@ coalesce(x,...)
**Arguments:**
- Any number of parameters of a non-compound type. All parameters must be compatible by data type.
- Any number of parameters of non-compound type. All parameters must be of mutually compatible data types.
**Returned values**
- The first non-`NULL` argument.
- The first non-`NULL` argument
- `NULL`, if all arguments are `NULL`.
**Example**
@ -110,10 +114,10 @@ coalesce(x,...)
Consider a list of contacts that may specify multiple ways to contact a customer.
``` text
┌─name─────┬─mail─┬─phone─────┬──icq─┐
┌─name─────┬─mail─┬─phone─────┬──telegram─┐
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└──────────┴──────┴───────────┴──────┘
└──────────┴──────┴───────────┴───────────
```
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
@ -121,22 +125,22 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
Get the first available contact method for the customer from the contact list:
``` sql
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
SELECT name, coalesce(mail, phone, CAST(telegram,'Nullable(String)')) FROM aBook;
```
``` text
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
┌─name─────┬─coalesce(mail, phone, CAST(telegram, 'Nullable(String)'))─┐
│ client 1 │ 123-45-67 │
│ client 2 │ ᴺᵁᴸᴸ │
└──────────┴──────────────────────────────────────────────────────┘
└──────────┴───────────────────────────────────────────────────────────
```
## ifNull
Returns an alternative value if the main argument is `NULL`.
Returns an alternative value if the argument is `NULL`.
``` sql
ifNull(x,alt)
ifNull(x, alt)
```
**Arguments:**
@ -146,25 +150,33 @@ ifNull(x,alt)
**Returned values**
- The value `x`, if `x` is not `NULL`.
- The value `alt`, if `x` is `NULL`.
- `x` if `x` is not `NULL`.
- `alt` if `x` is `NULL`.
**Example**
Query:
``` sql
SELECT ifNull('a', 'b');
```
Result:
``` text
┌─ifNull('a', 'b')─┐
│ a │
└──────────────────┘
```
Query:
``` sql
SELECT ifNull(NULL, 'b');
```
Result:
``` text
┌─ifNull(NULL, 'b')─┐
│ b │
@ -173,7 +185,7 @@ SELECT ifNull(NULL, 'b');
## nullIf
Returns `NULL` if the arguments are equal.
Returns `NULL` if both arguments are equal.
``` sql
nullIf(x, y)
@ -181,29 +193,37 @@ nullIf(x, y)
**Arguments:**
`x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
`x`, `y` — Values to compare. Must be of compatible types.
**Returned values**
- `NULL`, if the arguments are equal.
- The `x` value, if the arguments are not equal.
- `NULL` if the arguments are equal.
- `x` if the arguments are not equal.
**Example**
Query:
``` sql
SELECT nullIf(1, 1);
```
Result:
``` text
┌─nullIf(1, 1)─┐
│ ᴺᵁᴸᴸ │
└──────────────┘
```
Query:
``` sql
SELECT nullIf(1, 2);
```
Result:
``` text
┌─nullIf(1, 2)─┐
│ 1 │
@ -212,7 +232,7 @@ SELECT nullIf(1, 2);
## assumeNotNull
Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions.
Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
``` sql
assumeNotNull(x)
@ -224,36 +244,29 @@ assumeNotNull(x)
**Returned values**
- The original value from the non-`Nullable` type, if it is not `NULL`.
- Implementation specific result if the original value was `NULL`.
- The input value as non-`Nullable` type, if it is not `NULL`.
- An arbitrary value, if the input value is `NULL`.
**Example**
Consider the `t_null` table.
``` sql
SHOW CREATE TABLE t_null;
```
Table:
``` text
┌─statement─────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │
└───────────────────────────────────────────────────────────────────────────┘
```
``` text
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
│ 2 │ 3 │
└───┴──────┘
```
Apply the `assumeNotNull` function to the `y` column.
Query:
``` sql
SELECT assumeNotNull(y) FROM t_null;
SELECT assumeNotNull(y) FROM table;
```
Result:
``` text
┌─assumeNotNull(y)─┐
│ 0 │
@ -261,10 +274,14 @@ SELECT assumeNotNull(y) FROM t_null;
└──────────────────┘
```
Query:
``` sql
SELECT toTypeName(assumeNotNull(y)) FROM t_null;
```
Result:
``` text
┌─toTypeName(assumeNotNull(y))─┐
│ Int8 │
@ -282,28 +299,36 @@ toNullable(x)
**Arguments:**
- `x`The value of any non-compound type.
- `x`A value of non-compound type.
**Returned value**
- The input value with a `Nullable` type.
- The input value but of `Nullable` type.
**Example**
Query:
``` sql
SELECT toTypeName(10);
```
Result:
``` text
┌─toTypeName(10)─┐
│ UInt8 │
└────────────────┘
```
Query:
``` sql
SELECT toTypeName(toNullable(10));
```
Result:
``` text
┌─toTypeName(toNullable(10))─┐
│ Nullable(UInt8) │

View File

@ -12,7 +12,7 @@ A latitude and longitude pair can be transformed to a 64-bit H3 index, identifyi
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
The full description of the H3 system is available at [the Uber Engineering site](https://eng.uber.com/h3/).
## h3IsValid

View File

@ -249,7 +249,7 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
**Returned values**
- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
**Example**

View File

@ -697,7 +697,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
## gccMurmurHash
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between CLang and GCC builds.
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between Clang and GCC builds.
**Syntax**
@ -1161,7 +1161,7 @@ wordShingleSimHashUTF8(string[, shinglesize])
**Arguments**
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
**Returned value**

View File

@ -12,7 +12,9 @@ Zero as an argument is considered `false`, non-zero values are considered `true`
## and
Calculates the logical conjunction between two or more values.
Calculates the logical conjunction of two or more values.
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
**Syntax**
@ -20,9 +22,7 @@ Calculates the logical conjunction between two or more values.
and(val1, val2...)
```
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator).
Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-operator).
**Arguments**
@ -30,8 +30,8 @@ Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-ope
**Returned value**
- `0`, if there at least one argument evaluates to `false`,
- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`,
- `0`, if at least one argument evaluates to `false`,
- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`,
- `1`, otherwise.
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
@ -66,7 +66,9 @@ Result:
## or
Calculates the logical disjunction between two or more values.
Calculates the logical disjunction of two or more values.
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
**Syntax**
@ -74,9 +76,7 @@ Calculates the logical disjunction between two or more values.
or(val1, val2...)
```
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator).
Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-operator).
**Arguments**
@ -120,7 +120,7 @@ Result:
## not
Calculates logical negation of a value.
Calculates the logical negation of a value.
**Syntax**
@ -128,7 +128,7 @@ Calculates logical negation of a value.
not(val);
```
Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator).
Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-negation-operator).
**Arguments**
@ -158,7 +158,7 @@ Result:
## xor
Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc.
Calculates the logical exclusive disjunction of two or more values. For more than two input values, the function first xor-s the first two values, then xor-s the result with the third value etc.
**Syntax**

View File

@ -52,7 +52,7 @@ Alias: `ln(x)`
## exp2
Returns 2 to the power of the given argumetn
Returns 2 to the power of the given argument
**Syntax**
@ -82,7 +82,7 @@ log2(x)
## exp10
Returns 10 to the power of the given argumetn
Returns 10 to the power of the given argument.
**Syntax**

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,7 @@ Uses a linear congruential generator.
## randCanonical
Returns a Float64 value, evenly distributed in [0, 1).
Returns a random Float64 value, evenly distributed in interval [0, 1).
## randConstant
@ -54,11 +54,9 @@ Result:
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
```
# Functions for Generating Random Numbers based on a Distribution
## randUniform
Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
**Syntax**
@ -68,8 +66,8 @@ randUniform(min, max)
**Arguments**
- `min` - `Float64` - min value of the range,
- `max` - `Float64` - max value of the range.
- `min` - `Float64` - left boundary of the range,
- `max` - `Float64` - right boundary of the range.
**Returned value**
@ -97,7 +95,7 @@ Result:
## randNormal
Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
**Syntax**
@ -108,7 +106,7 @@ randNormal(mean, variance)
**Arguments**
- `mean` - `Float64` - mean value of distribution,
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
**Returned value**
@ -136,7 +134,7 @@ Result:
## randLogNormal
Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
Returns a random Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
**Syntax**
@ -147,7 +145,7 @@ randLogNormal(mean, variance)
**Arguments**
- `mean` - `Float64` - mean value of distribution,
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
**Returned value**
@ -175,7 +173,7 @@ Result:
## randBinomial
Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
**Syntax**
@ -186,7 +184,7 @@ randBinomial(experiments, probability)
**Arguments**
- `experiments` - `UInt64` - number of experiments,
- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only).
- `probability` - `Float64` - probability of success in each experiment, a value between 0 and 1.
**Returned value**
@ -214,7 +212,7 @@ Result:
## randNegativeBinomial
Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
Returns a random UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
**Syntax**
@ -225,7 +223,7 @@ randNegativeBinomial(experiments, probability)
**Arguments**
- `experiments` - `UInt64` - number of experiments,
- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only).
- `probability` - `Float64` - probability of failure in each experiment, a value between 0 and 1.
**Returned value**
@ -253,7 +251,7 @@ Result:
## randPoisson
Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
**Syntax**
@ -291,7 +289,7 @@ Result:
## randBernoulli
Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
**Syntax**
@ -301,7 +299,7 @@ randBernoulli(probability)
**Arguments**
- `probability` - `Float64` - probability of success (values in `0...1` range only).
- `probability` - `Float64` - probability of success, a value between 0 and 1.
**Returned value**
@ -329,7 +327,7 @@ Result:
## randExponential
Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
Returns a random Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
**Syntax**
@ -367,7 +365,7 @@ Result:
## randChiSquared
Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
**Syntax**
@ -405,7 +403,7 @@ Result:
## randStudentT
Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
Returns a random Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
**Syntax**
@ -443,7 +441,7 @@ Result:
## randFisherF
Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
**Syntax**
@ -480,47 +478,160 @@ Result:
└─────────────────────┘
```
# Functions for Generating Random Strings
## randomString
Returns a random String of specified `length`. Not all characters may be printable.
Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
**Syntax**
```sql
``` sql
randomString(length)
```
**Arguments**
- `length` — String length in bytes. Positive integer.
**Returned value**
- String filled with random bytes.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
str: 3 G : pT ?w тi k aV f6
len: 30
Row 2:
──────
str: 9 ,] ^ ) ]?? 8
len: 30
```
## randomFixedString
Like `randomString` but returns a FixedString.
## randomPrintableASCII
Returns a random String of specified `length`. All characters are printable.
Generates a binary string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
**Syntax**
``` sql
randomFixedString(length);
```
**Arguments**
- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- String filled with random bytes.
Type: [FixedString](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
```sql
SELECT randomFixedString(13) as rnd, toTypeName(rnd)
```
Result:
```text
┌─rnd──────┬─toTypeName(randomFixedString(13))─┐
│ j▒h㋖HɨZ'▒ │ FixedString(13) │
└──────────┴───────────────────────────────────┘
```
## randomPrintableASCII
Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) characters. All characters are printable.
If you pass `length < 0`, the behavior of the function is undefined.
**Syntax**
``` sql
randomPrintableASCII(length)
```
**Arguments**
- `length` — String length in bytes. Positive integer.
**Returned value**
- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters.
Type: [String](../../sql-reference/data-types/string.md)
**Example**
``` sql
SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3
```
``` text
┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐
│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │
│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │
│ 2 │ /"+<"wUTh:=LjJ Vm!c&hI*m#XTfzz │ 30 │
└────────┴────────────────────────────────┴──────────────────────────────────┘
```
## randomStringUTF8
Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable
Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode.
**Syntax**
``` sql
randomStringUTF8(length);
```
**Arguments**
- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- UTF-8 random string.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
```sql
randomStringUTF8(length)
SELECT randomStringUTF8(13)
```
Result:
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
```
## fuzzBits
**Syntax**
Inverts the bits of String or FixedString `s`, each with probability `prob`.
Flips the bits of String or FixedString `s`, each with probability `prob`.
**Syntax**
@ -529,8 +640,8 @@ fuzzBits(s, prob)
```
**Arguments**
- `s` - `String` or `FixedString`
- `prob` - constant `Float32/64`
- `s` - `String` or `FixedString`,
- `prob` - constant `Float32/64` between 0.0 and 1.0.
**Returned value**

View File

@ -393,7 +393,7 @@ Reverses a sequence of Unicode code points in a string. Assumes that the string
## format
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers).
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers).
**Syntax**

View File

@ -6,7 +6,7 @@ sidebar_label: Replacing in Strings
# Functions for Replacing in Strings
[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately.
[General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately.
## replaceOne

View File

@ -793,7 +793,7 @@ toDecimalString(number, scale)
**Returned value**
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale.
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
**Example**

View File

@ -19,7 +19,7 @@ A function configuration contains the following settings:
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.

View File

@ -222,7 +222,7 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case
### Distributed Subqueries and max_rows_in_set
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries.
This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
```sql

View File

@ -24,7 +24,7 @@ For tuple negation: [tupleNegate](../../sql-reference/functions/tuple-functions.
`a * b` The `multiply (a, b)` function.
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar profuct: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar product: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
`a / b` The `divide(a, b)` function.

View File

@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;

View File

@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;

View File

@ -127,7 +127,7 @@ CROSS JOIN system.numbers AS c
Settings:
- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
- `run_passes` — Run all query tree passes before dumping the query tree. Default: `1`.
- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
@ -475,5 +475,5 @@ Result:
```
:::note
The validation is not complete, so a successfull query does not guarantee that the override would not cause issues.
The validation is not complete, so a successful query does not guarantee that the override would not cause issues.
:::

View File

@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
- Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates).
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havent happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.

View File

@ -289,7 +289,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns.
Example of a query without `WITH FILL`:

View File

@ -6,7 +6,7 @@ sidebar_label: urlCluster
# urlCluster Table Function
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisk in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
**Syntax**

View File

@ -21,7 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unounded following). |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
## ClickHouse-specific Window Functions
@ -39,7 +39,7 @@ The computed value is the following for each row:
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests

View File

@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
При помощи [интефейса CLI](../../interfaces/cli.md):
``` bash
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
```
Чтобы не вставлять данные вручную, используйте одну из [готовых библиотек](../../interfaces/index.md).

View File

@ -138,7 +138,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
OutdatedPartsLoadingThreadPool::initialize(
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
0, // We don't need any threads one all the parts will be loaded
config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000));
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16));
}

View File

@ -696,7 +696,7 @@ try
OutdatedPartsLoadingThreadPool::initialize(
server_settings.max_outdated_parts_loading_thread_pool_size,
0, // We don't need any threads one all the parts will be loaded
server_settings.outdated_part_loading_thread_pool_queue_size);
server_settings.max_outdated_parts_loading_thread_pool_size);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))

View File

@ -190,7 +190,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
connected = true;
sendHello();
receiveHello();
receiveHello(timeouts.handshake_timeout);
if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
sendAddendum();
@ -321,8 +321,10 @@ void Connection::sendAddendum()
}
void Connection::receiveHello()
void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
{
TimeoutSetter timeout_setter(*socket, socket->getSendTimeout(), handshake_timeout);
/// Receive hello packet.
UInt64 packet_type = 0;
@ -375,6 +377,10 @@ void Connection::receiveHello()
receiveException()->rethrow();
else
{
/// Reset timeout_setter before disconnect,
/// because after disconnect socket will be invalid.
timeout_setter.reset();
/// Close connection, to not stay in unsynchronised state.
disconnect();
throwUnexpectedPacket(packet_type, "Hello or Exception");

View File

@ -256,7 +256,7 @@ private:
void connect(const ConnectionTimeouts & timeouts);
void sendHello();
void sendAddendum();
void receiveHello();
void receiveHello(const Poco::Timespan & handshake_timeout);
#if USE_SSL
void sendClusterNameAndSalt();

View File

@ -67,7 +67,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0),
Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000), 0));
timeouts.sync_request_timeout = Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0);
}

View File

@ -9,6 +9,7 @@
#include "hasLinuxCapability.h"
#include <base/unaligned.h>
#include <base/getThreadId.h>
#include <Common/logger_useful.h>
#include <cerrno>
@ -202,10 +203,12 @@ bool checkPermissionsImpl()
/// Check that we can successfully initialize TaskStatsInfoGetter.
/// It will ask about family id through Netlink.
/// On some LXC containers we have capability but we still cannot use Netlink.
/// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result.
try
{
TaskStatsInfoGetter();
::taskstats stats{};
TaskStatsInfoGetter().getStat(stats, static_cast<pid_t>(getThreadId()));
}
catch (const Exception & e)
{

View File

@ -37,7 +37,6 @@ class QueryThreadLog;
class TasksStatsCounters;
struct RUsageCounters;
struct PerfEventsCounters;
class TaskStatsInfoGetter;
class InternalTextLogsQueue;
struct ViewRuntimeData;
class QueryViewsLog;

View File

@ -272,7 +272,8 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
}
catch (...)
{
rollbackRequest(request_for_session, true);
tryLogCurrentException(__PRETTY_FUNCTION__);
rollbackRequestNoLock(request_for_session, true);
throw;
}
@ -411,6 +412,14 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession
storage->rollbackRequest(request_for_session.zxid, allow_missing);
}
void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing)
{
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
storage->rollbackRequest(request_for_session.zxid, allow_missing);
}
nuraft::ptr<nuraft::snapshot> KeeperStateMachine::last_snapshot()
{
/// Just return the latest snapshot.

View File

@ -68,6 +68,8 @@ public:
// (can happen in case of exception during preprocessing)
void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing);
void rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing);
uint64_t last_commit_index() override { return last_committed_idx; }
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.

View File

@ -22,7 +22,6 @@ namespace DB
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
M(UInt64, outdated_part_loading_thread_pool_queue_size, 10000, "Queue size for parts loading thread pool.", 0) \
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \

View File

@ -55,6 +55,7 @@ class IColumn;
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \
M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \
M(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connection timeout if there are no replicas.", 0) \
M(Milliseconds, handshake_timeout_ms, 10000, "Timeout for receiving HELLO packet from replicas.", 0) \
M(Milliseconds, connect_timeout_with_failover_ms, 1000, "Connection timeout for selecting first healthy replica.", 0) \
M(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Timeout for receiving data from network, in seconds. If no bytes were received in this interval, exception is thrown. If you set this setting on client, the 'send_timeout' for the socket will be also set on the corresponding connection end on the server.", 0) \

View File

@ -1073,56 +1073,73 @@ private:
size_t size = vec_from.size();
for (size_t i = 0; i < size; ++i)
{
ToType h;
ToType hash;
if constexpr (Impl::use_int_hash_for_pods)
{
if constexpr (std::is_same_v<ToType, UInt64>)
h = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
hash = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
else
h = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
hash = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
}
else
{
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
h = JavaHashImpl::apply(vec_from[i]);
hash = JavaHashImpl::apply(vec_from[i]);
else
{
FromType v = vec_from[i];
FromType value = vec_from[i];
if constexpr (std::endian::native == std::endian::big)
{
FromType tmp_v;
reverseMemcpy(&tmp_v, &v, sizeof(v));
v = tmp_v;
FromType value_reversed;
reverseMemcpy(&value_reversed, &value, sizeof(value));
value = value_reversed;
}
h = apply(key, reinterpret_cast<const char *>(&v), sizeof(v));
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
}
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
auto value = col_from_const->template getValue<FromType>();
ToType hash;
if constexpr (Impl::use_int_hash_for_pods)
{
if constexpr (std::is_same_v<ToType, UInt64>)
hash = IntHash64Impl::apply(bit_cast<UInt64>(value));
else
hash = IntHash32Impl::apply(bit_cast<UInt32>(value));
}
else
{
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
hash = JavaHashImpl::apply(value);
else
{
if constexpr (std::endian::native == std::endian::big)
{
FromType value_reversed;
reverseMemcpy(&value_reversed, &value, sizeof(value));
value = value_reversed;
}
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
}
size_t size = vec_to.size();
if constexpr (first)
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
column->getName(), getName());
@ -1139,46 +1156,40 @@ private:
size_t size = vec_from.size();
for (size_t i = 0; i < size; ++i)
{
ToType h;
ToType hash;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
}
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
else
{
char tmp_buffer[sizeof(vec_from[i])];
reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i]));
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
}
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{
auto value = col_from_const->template getValue<FromType>();
ToType h;
ToType hash;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
else
{
char tmp_buffer[sizeof(value)];
reverseMemcpy(tmp_buffer, &value, sizeof(value));
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
}
size_t size = vec_to.size();
if constexpr (first)
vec_to.assign(size, h);
vec_to.assign(size, hash);
else
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = combineHashes(key, vec_to[i], h);
}
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1191,11 +1202,11 @@ private:
for (size_t i = 0, size = column->size(); i < size; ++i)
{
StringRef bytes = column->getDataAt(i);
const ToType h = apply(key, bytes.data, bytes.size);
const ToType hash = apply(key, bytes.data, bytes.size);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
@ -1211,14 +1222,14 @@ private:
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const ToType h = apply(key,
const ToType hash = apply(key,
reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
current_offset = offsets[i];
}
@ -1231,11 +1242,11 @@ private:
for (size_t i = 0; i < size; ++i)
{
const ToType h = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
@ -1245,17 +1256,11 @@ private:
const size_t size = vec_to.size();
if constexpr (first)
{
vec_to.assign(size, hash);
}
else
{
for (size_t i = 0; i < size; ++i)
{
vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
column->getName(), getName());
@ -1283,16 +1288,16 @@ private:
{
ColumnArray::Offset next_offset = offsets[i];
ToType h;
ToType hash;
if constexpr (std::is_same_v<ToType, UInt64>)
h = IntHash64Impl::apply(next_offset - current_offset);
hash = IntHash64Impl::apply(next_offset - current_offset);
else
h = IntHash32Impl::apply(next_offset - current_offset);
hash = IntHash32Impl::apply(next_offset - current_offset);
if constexpr (first)
vec_to[i] = h;
vec_to[i] = hash;
else
vec_to[i] = combineHashes(key, vec_to[i], h);
vec_to[i] = combineHashes(key, vec_to[i], hash);
for (size_t j = current_offset; j < next_offset; ++j)
vec_to[i] = combineHashes(key, vec_to[i], vec_temp[j]);

View File

@ -17,22 +17,7 @@ ConnectionTimeouts::ConnectionTimeouts(
, secure_connection_timeout(connection_timeout)
, hedged_connection_timeout(receive_timeout_)
, receive_data_timeout(receive_timeout_)
{
}
ConnectionTimeouts::ConnectionTimeouts(
Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_)
: connection_timeout(connection_timeout_)
, send_timeout(send_timeout_)
, receive_timeout(receive_timeout_)
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
, http_keep_alive_timeout(0)
, secure_connection_timeout(connection_timeout)
, hedged_connection_timeout(receive_timeout_)
, receive_data_timeout(receive_timeout_)
, handshake_timeout(receive_timeout_)
{
}
@ -41,7 +26,26 @@ ConnectionTimeouts::ConnectionTimeouts(
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_)
Poco::Timespan handshake_timeout_)
: connection_timeout(connection_timeout_)
, send_timeout(send_timeout_)
, receive_timeout(receive_timeout_)
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
, http_keep_alive_timeout(0)
, secure_connection_timeout(connection_timeout)
, hedged_connection_timeout(receive_timeout_)
, receive_data_timeout(receive_timeout_)
, handshake_timeout(handshake_timeout_)
{
}
ConnectionTimeouts::ConnectionTimeouts(
Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_,
Poco::Timespan handshake_timeout_)
: connection_timeout(connection_timeout_)
, send_timeout(send_timeout_)
, receive_timeout(receive_timeout_)
@ -50,6 +54,7 @@ ConnectionTimeouts::ConnectionTimeouts(
, secure_connection_timeout(connection_timeout)
, hedged_connection_timeout(receive_timeout_)
, receive_data_timeout(receive_timeout_)
, handshake_timeout(handshake_timeout_)
{
}
@ -60,16 +65,18 @@ ConnectionTimeouts::ConnectionTimeouts(
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_,
Poco::Timespan secure_connection_timeout_,
Poco::Timespan receive_hello_timeout_,
Poco::Timespan receive_data_timeout_)
Poco::Timespan hedged_connection_timeout_,
Poco::Timespan receive_data_timeout_,
Poco::Timespan handshake_timeout_)
: connection_timeout(connection_timeout_)
, send_timeout(send_timeout_)
, receive_timeout(receive_timeout_)
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
, http_keep_alive_timeout(http_keep_alive_timeout_)
, secure_connection_timeout(secure_connection_timeout_)
, hedged_connection_timeout(receive_hello_timeout_)
, hedged_connection_timeout(hedged_connection_timeout_)
, receive_data_timeout(receive_data_timeout_)
, handshake_timeout(handshake_timeout_)
{
}
@ -90,13 +97,14 @@ ConnectionTimeouts ConnectionTimeouts::getSaturated(Poco::Timespan limit) const
saturate(http_keep_alive_timeout, limit),
saturate(secure_connection_timeout, limit),
saturate(hedged_connection_timeout, limit),
saturate(receive_data_timeout, limit));
saturate(receive_data_timeout, limit),
saturate(handshake_timeout, limit));
}
/// Timeouts for the case when we have just single attempt to connect.
ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Settings & settings)
{
return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout);
return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, settings.handshake_timeout_ms);
}
/// Timeouts for the case when we will try many addresses in a loop.
@ -110,7 +118,8 @@ ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings
0,
settings.connect_timeout_with_failover_secure_ms,
settings.hedged_connection_timeout_ms,
settings.receive_data_timeout_ms);
settings.receive_data_timeout_ms,
settings.handshake_timeout_ms);
}
ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout)
@ -120,7 +129,8 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
settings.http_send_timeout,
settings.http_receive_timeout,
settings.tcp_keep_alive_timeout,
http_keep_alive_timeout);
http_keep_alive_timeout,
settings.http_receive_timeout);
}
}

View File

@ -23,6 +23,9 @@ struct ConnectionTimeouts
Poco::Timespan hedged_connection_timeout;
Poco::Timespan receive_data_timeout;
/// Timeout for receiving HELLO packet
Poco::Timespan handshake_timeout;
/// Timeout for synchronous request-result protocol call (like Ping or TablesStatus)
Poco::Timespan sync_request_timeout = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0);
@ -35,13 +38,15 @@ struct ConnectionTimeouts
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_);
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan handshake_timeout_);
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
Poco::Timespan receive_timeout_,
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_);
Poco::Timespan http_keep_alive_timeout_,
Poco::Timespan handshake_timeout_);
ConnectionTimeouts(Poco::Timespan connection_timeout_,
Poco::Timespan send_timeout_,
@ -49,8 +54,9 @@ struct ConnectionTimeouts
Poco::Timespan tcp_keep_alive_timeout_,
Poco::Timespan http_keep_alive_timeout_,
Poco::Timespan secure_connection_timeout_,
Poco::Timespan receive_hello_timeout_,
Poco::Timespan receive_data_timeout_);
Poco::Timespan hedged_connection_timeout_,
Poco::Timespan receive_data_timeout_,
Poco::Timespan handshake_timeout_);
static Poco::Timespan saturate(Poco::Timespan timespan, Poco::Timespan limit);
ConnectionTimeouts getSaturated(Poco::Timespan limit) const;

View File

@ -29,14 +29,12 @@ TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan t
TimeoutSetter::~TimeoutSetter()
{
try
{
bool connected = socket.impl()->initialized();
if (!connected)
if (was_reset)
return;
socket.setSendTimeout(old_send_timeout);
socket.setReceiveTimeout(old_receive_timeout);
try
{
reset();
}
catch (...)
{
@ -44,4 +42,15 @@ TimeoutSetter::~TimeoutSetter()
}
}
void TimeoutSetter::reset()
{
bool connected = socket.impl()->initialized();
if (!connected)
return;
socket.setSendTimeout(old_send_timeout);
socket.setReceiveTimeout(old_receive_timeout);
was_reset = true;
}
}

View File

@ -6,7 +6,7 @@
namespace DB
{
/// Temporarily overrides socket send/receive timeouts and reset them back into destructor
/// Temporarily overrides socket send/receive timeouts and reset them back into destructor (or manually by calling reset method)
/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value).
struct TimeoutSetter
{
@ -19,6 +19,9 @@ struct TimeoutSetter
~TimeoutSetter();
/// Reset timeouts back.
void reset();
Poco::Net::StreamSocket & socket;
Poco::Timespan send_timeout;
@ -26,5 +29,6 @@ struct TimeoutSetter
Poco::Timespan old_send_timeout;
Poco::Timespan old_receive_timeout;
bool was_reset = false;
};
}

View File

@ -543,13 +543,17 @@ namespace
template <typename Map, typename KeyGetter>
struct Inserter
{
static ALWAYS_INLINE void insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
static ALWAYS_INLINE bool insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
Arena & pool)
{
auto emplace_result = key_getter.emplaceKey(map, i, pool);
if (emplace_result.isInserted() || join.anyTakeLastRow())
{
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
return true;
}
return false;
}
static ALWAYS_INLINE void insertAll(const HashJoin &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
@ -582,7 +586,7 @@ namespace
template <JoinStrictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
size_t NO_INLINE insertFromBlockImplTypeCase(
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
{
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, RowRef>;
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
@ -593,10 +597,18 @@ namespace
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
/// For ALL and ASOF join always insert values
is_inserted = !mapped_one || is_asof_join;
for (size_t i = 0; i < rows; ++i)
{
if (has_null_map && (*null_map)[i])
{
/// nulls are not inserted into hash table,
/// keep them for RIGHT and FULL joins
is_inserted = true;
continue;
}
/// Check condition for right table from ON section
if (join_mask && !(*join_mask)[i])
@ -605,7 +617,7 @@ namespace
if constexpr (is_asof_join)
Inserter<Map, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column);
else if constexpr (mapped_one)
Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
is_inserted |= Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
else
Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
}
@ -616,32 +628,37 @@ namespace
template <JoinStrictness STRICTNESS, typename KeyGetter, typename Map>
size_t insertFromBlockImplType(
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
{
if (null_map)
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted);
else
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted);
}
template <JoinStrictness STRICTNESS, typename Maps>
size_t insertFromBlockImpl(
HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
{
switch (type)
{
case HashJoin::Type::EMPTY: return 0;
case HashJoin::Type::CROSS: return 0; /// Do nothing. We have already saved block, and it is enough.
case HashJoin::Type::EMPTY:
[[fallthrough]];
case HashJoin::Type::CROSS:
/// Do nothing. We will only save block, and it is enough
is_inserted = true;
return 0;
#define M(TYPE) \
case HashJoin::Type::TYPE: \
return insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); \
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
}
@ -816,6 +833,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits)
}
}
bool is_inserted = false;
if (kind != JoinKind::Cross)
{
joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map)
@ -824,28 +842,35 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits)
*this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map,
/// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized
join_mask_col.getData(),
data->pool);
data->pool, is_inserted);
if (multiple_disjuncts)
used_flags.reinit<kind_, strictness_>(stored_block);
else
else if (is_inserted)
/// Number of buckets + 1 value from zero storage
used_flags.reinit<kind_, strictness_>(size + 1);
});
}
if (!multiple_disjuncts && save_nullmap)
if (!multiple_disjuncts && save_nullmap && is_inserted)
{
data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes();
data->blocks_nullmaps.emplace_back(stored_block, null_map_holder);
}
if (!multiple_disjuncts && not_joined_map)
if (!multiple_disjuncts && not_joined_map && is_inserted)
{
data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes();
data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map));
}
if (!multiple_disjuncts && !is_inserted)
{
LOG_TRACE(log, "Skipping inserting block with {} rows", rows);
data->blocks_allocated_size -= stored_block->allocatedBytes();
data->blocks.pop_back();
}
if (!check_limits)
return true;

View File

@ -13,7 +13,7 @@ namespace DB
template <typename Result, typename Callback = std::function<Result()>>
using ThreadPoolCallbackRunner = std::function<std::future<Result>(Callback &&, Priority)>;
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'.
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrowOnError()'.
template <typename Result, typename Callback = std::function<Result()>>
ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name)
{
@ -44,7 +44,7 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
auto future = task->get_future();
my_pool->scheduleOrThrow([my_task = std::move(task)]{ (*my_task)(); }, priority);
my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority);
return future;
};

View File

@ -272,7 +272,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
{
/// If totals step has HAVING expression, skip it for now.
/// TODO:
/// We can merge HAVING expression with current filter.
/// We can merge HAVING expression with current filer.
/// Also, we can push down part of HAVING which depend only on aggregation keys.
if (totals_having->getActions())
return 0;
@ -323,9 +323,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
{
const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin();
/// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys.
/// Only inner and left(/right) join are supported. Other types may generate default values for left table keys.
/// So, if we push down a condition like `key != 0`, not all rows may be filtered.
if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind)
if (table_join.kind() != JoinKind::Inner && table_join.kind() != kind)
return 0;
bool is_left = kind == JoinKind::Left;

View File

@ -4854,6 +4854,9 @@ void MergeTreeData::checkAlterPartitionIsPossible(
void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context)
{
if (!supportsReplication() && isStaticStorage())
return;
DataPartsVector parts_to_remove;
const auto * partition_ast = partition->as<ASTPartition>();
if (partition_ast && partition_ast->all)
@ -4874,6 +4877,9 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context
void MergeTreeData::checkPartCanBeDropped(const String & part_name)
{
if (!supportsReplication() && isStaticStorage())
return;
auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
if (!part)
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name);

View File

@ -865,7 +865,7 @@ public:
DiskPtr tryGetDiskForDetachedPart(const String & part_name) const;
DiskPtr getDiskForDetachedPart(const String & part_name) const;
bool storesDataOnDisk() const override { return true; }
bool storesDataOnDisk() const override { return !isStaticStorage(); }
Strings getDataPaths() const override;
/// Reserves space at least 1MB.

View File

@ -114,7 +114,7 @@ StorageMergeTree::StorageMergeTree(
loadDataParts(has_force_restore_data_flag);
if (!attach && !getDataPartsForInternalUsage().empty())
if (!attach && !getDataPartsForInternalUsage().empty() && !isStaticStorage())
throw Exception(ErrorCodes::INCORRECT_DATA,
"Data directory for table already containing data parts - probably "
"it was unclean DROP table or manual intervention. "
@ -283,6 +283,9 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met
void StorageMergeTree::checkTableCanBeDropped() const
{
if (!supportsReplication() && isStaticStorage())
return;
auto table_id = getStorageID();
getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes());
}

View File

@ -10,16 +10,22 @@ def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance(
"node1", main_configs=["configs/storage_conf.xml"], with_nginx=True
"node1",
main_configs=["configs/storage_conf.xml"],
with_nginx=True,
)
cluster.add_instance(
"node2",
main_configs=["configs/storage_conf_web.xml"],
with_nginx=True,
stay_alive=True,
with_zookeeper=True,
)
cluster.add_instance(
"node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
"node3",
main_configs=["configs/storage_conf_web.xml"],
with_nginx=True,
with_zookeeper=True,
)
cluster.add_instance(
@ -95,7 +101,7 @@ def test_usage(cluster, node_name):
for i in range(3):
node2.query(
"""
ATTACH TABLE test{} UUID '{}'
CREATE TABLE test{} UUID '{}'
(id Int32) ENGINE = MergeTree() ORDER BY id
SETTINGS storage_policy = 'web';
""".format(
@ -140,7 +146,7 @@ def test_incorrect_usage(cluster):
global uuids
node2.query(
"""
ATTACH TABLE test0 UUID '{}'
CREATE TABLE test0 UUID '{}'
(id Int32) ENGINE = MergeTree() ORDER BY id
SETTINGS storage_policy = 'web';
""".format(
@ -173,7 +179,7 @@ def test_cache(cluster, node_name):
for i in range(3):
node2.query(
"""
ATTACH TABLE test{} UUID '{}'
CREATE TABLE test{} UUID '{}'
(id Int32) ENGINE = MergeTree() ORDER BY id
SETTINGS storage_policy = 'cached_web';
""".format(
@ -238,7 +244,7 @@ def test_unavailable_server(cluster):
global uuids
node2.query(
"""
ATTACH TABLE test0 UUID '{}'
CREATE TABLE test0 UUID '{}'
(id Int32) ENGINE = MergeTree() ORDER BY id
SETTINGS storage_policy = 'web';
""".format(
@ -276,3 +282,35 @@ def test_unavailable_server(cluster):
)
node2.start_clickhouse()
node2.query("DROP TABLE test0 SYNC")
def test_replicated_database(cluster):
node1 = cluster.instances["node3"]
node1.query(
"CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r1')",
settings={"allow_experimental_database_replicated": 1},
)
global uuids
node1.query(
"""
CREATE TABLE rdb.table0 UUID '{}'
(id Int32) ENGINE = MergeTree() ORDER BY id
SETTINGS storage_policy = 'web';
""".format(
uuids[0]
)
)
node2 = cluster.instances["node2"]
node2.query(
"CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r2')",
settings={"allow_experimental_database_replicated": 1},
)
node2.query("SYSTEM SYNC DATABASE REPLICA rdb")
assert node1.query("SELECT count() FROM rdb.table0") == "5000000\n"
assert node2.query("SELECT count() FROM rdb.table0") == "5000000\n"
node1.query("DROP DATABASE rdb SYNC")
node2.query("DROP DATABASE rdb SYNC")

View File

@ -169,7 +169,9 @@ test_config3 = """
def send_repeated_query(table, count=5):
for i in range(count):
node.query_and_get_error(
"SELECT count() FROM {} SETTINGS receive_timeout=1".format(table)
"SELECT count() FROM {} SETTINGS receive_timeout=1, handshake_timeout_ms=1".format(
table
)
)

View File

@ -40,8 +40,8 @@ def test(started_cluster):
cluster.pause_container("node_1")
node.query("SYSTEM RELOAD CONFIG")
node.query_and_get_error(
"SELECT count() FROM distributed SETTINGS receive_timeout=1"
error = node.query_and_get_error(
"SELECT count() FROM distributed SETTINGS receive_timeout=1, handshake_timeout_ms=1"
)
result = node.query(

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-tsan, no-debug
# Tags: no-tsan, no-debug, no-msan
# Tag no-tsan: Too long for TSan
# shellcheck disable=SC2016

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: no-tsan, no-debug, no-fasttest
# Tags: no-tsan, no-debug, no-fasttest, no-msan
# Tag no-tsan: Too long for TSan
# shellcheck disable=SC2016

Some files were not shown because too many files have changed in this diff Show More