mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into minor-cgroup-improvements
This commit is contained in:
commit
6243a16824
@ -3,5 +3,5 @@
|
||||
set -x
|
||||
|
||||
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
|
||||
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
|
||||
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
|
@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
|
||||
|
||||
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
|
||||
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
|
||||
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
|
@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
|
||||
```
|
||||
|
||||
:::warning
|
||||
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
|
||||
:::
|
||||
|
||||
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
|
||||
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
|
||||
|
||||
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
|
||||
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
|
||||
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
|
||||
|
||||
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.
|
||||
|
||||
|
@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
|
||||
|
||||
## Specifics and Recommendations {#specifics-and-recommendations}
|
||||
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
|
||||
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
|
||||
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| rpc\_client\_connect\_timeout | 600 * 1000 |
|
||||
| rpc\_client\_read\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_write\_timeout | 3600 * 1000 |
|
||||
| rpc\_client\_socekt\_linger\_timeout | -1 |
|
||||
| rpc\_client\_socket\_linger\_timeout | -1 |
|
||||
| rpc\_client\_connect\_retry | 10 |
|
||||
| rpc\_client\_timeout | 3600 * 1000 |
|
||||
| dfs\_default\_replica | 3 |
|
||||
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|
||||
| output\_write\_timeout | 3600 * 1000 |
|
||||
| output\_close\_timeout | 3600 * 1000 |
|
||||
| output\_packetpool\_size | 1024 |
|
||||
| output\_heeartbeat\_interval | 10 * 1000 |
|
||||
| output\_heartbeat\_interval | 10 * 1000 |
|
||||
| dfs\_client\_failover\_max\_attempts | 15 |
|
||||
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
|
||||
| dfs\_client\_socketcache\_expiryMsec | 3000 |
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Hive
|
||||
|
||||
# Hive
|
||||
|
||||
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
|
||||
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
|
||||
|
||||
- Text: only supports simple scalar column types except `binary`
|
||||
|
||||
|
@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
|
||||
|
||||
`NATS` lets you:
|
||||
|
||||
- Publish or subcribe to message subjects.
|
||||
- Publish or subscribe to message subjects.
|
||||
- Process new messages as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-redisstreams-creating-a-table}
|
||||
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Required parameters:
|
||||
|
||||
- `nats_url` – host:port (for example, `localhost:5672`)..
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_subjects` – List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
|
||||
- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
|
||||
|
||||
Optional parameters:
|
||||
|
@ -57,7 +57,7 @@ or via config (since version 21.11):
|
||||
</named_collections>
|
||||
```
|
||||
|
||||
Some parameters can be overriden by key value arguments:
|
||||
Some parameters can be overridden by key value arguments:
|
||||
``` sql
|
||||
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
|
||||
```
|
||||
|
@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
|
||||
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
|
||||
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
|
||||
|
||||
### PARTITION BY
|
||||
|
||||
@ -140,8 +140,8 @@ The following settings can be set before query execution or placed into configur
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
|
||||
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
|
||||
|
||||
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.
|
||||
|
||||
|
@ -135,7 +135,7 @@ ORDER BY id;
|
||||
|
||||
Annoy supports `L2Distance` and `cosineDistance`.
|
||||
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
|
||||
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
|
||||
|
||||
__Example__:
|
||||
``` sql
|
||||
|
@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
|
||||
|
||||
The key factors for a good performance:
|
||||
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
|
||||
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
|
||||
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
|
||||
- partitions should be comparable in size, so all threads will do roughly the same amount of work
|
||||
|
||||
|
@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
|
||||
|
||||
:::tip
|
||||
Disks can also be declared in the `SETTINGS` section of a query. This is useful
|
||||
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
|
||||
See [dynamic storage](#dynamic-storage) for more details.
|
||||
:::
|
||||
|
||||
@ -856,7 +856,7 @@ Tags:
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
|
||||
Cofiguration examples:
|
||||
Configuration examples:
|
||||
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
@ -1224,7 +1224,7 @@ Limit parameters (mainly for internal usage):
|
||||
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
|
||||
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
|
||||
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object.
|
||||
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
|
@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
|
||||
|
||||
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
|
||||
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
|
||||
|
||||
## Supported operations {#table_engine-KeeperMap-supported-operations}
|
||||
|
||||
|
@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
|
||||
|
||||
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
|
||||
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
|
||||
|
||||
```sql
|
||||
INSERT INTO amazon_reviews
|
||||
|
@ -806,7 +806,7 @@ FROM
|
||||
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
|
||||
```
|
||||
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
|
||||
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
|
||||
|
||||
## Authors with the most diverse impact
|
||||
|
||||
|
@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
|
||||
|
||||
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
|
||||
|
||||
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
|
||||
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
|
||||
"Crowdsourced air traffic data from the OpenSky Network 2019–2020"
|
||||
Earth System Science Data 13(2), 2021
|
||||
https://doi.org/10.5194/essd-13-357-2021
|
||||
|
@ -542,7 +542,7 @@ LIMIT 10;
|
||||
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
|
||||
```
|
||||
|
||||
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
|
||||
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -718,4 +718,3 @@ ORDER BY quarter ASC;
|
||||
└────────────┴────────────┴───────────┴──────────┘
|
||||
|
||||
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
|
||||
```
|
@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
|
||||
|
||||
## Step-by-step instructions
|
||||
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
|
||||
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
|
||||
|
||||
```sql
|
||||
DESCRIBE s3Cluster(
|
||||
@ -322,7 +322,7 @@ ORDER BY month ASC;
|
||||
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
|
||||
|
||||
|
||||
### More subtitiles over time and when
|
||||
### More subtitles over time and when
|
||||
|
||||
With advances in speech recognition, it’s easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?
|
||||
|
||||
|
@ -275,9 +275,9 @@ Type: UInt64
|
||||
|
||||
Default: 1000
|
||||
|
||||
## max_concurrent_insert_queries
|
||||
## max_concurrent_queries
|
||||
|
||||
Limit on total number of concurrent insert queries. Zero means Unlimited.
|
||||
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -287,9 +287,9 @@ Type: UInt64
|
||||
|
||||
Default: 0
|
||||
|
||||
## max_concurrent_queries
|
||||
## max_concurrent_insert_queries
|
||||
|
||||
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
|
||||
Limit on total number of concurrent insert queries. Zero means Unlimited.
|
||||
|
||||
:::note
|
||||
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
|
||||
@ -1277,49 +1277,6 @@ For more information, see the section [Creating replicated tables](../../engines
|
||||
<macros incl="macros" optional="true" />
|
||||
```
|
||||
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
|
||||
|
||||
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
|
||||
|
||||
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [max_concurrent_queries](#max-concurrent-queries)
|
||||
|
||||
## max_open_files {#max-open-files}
|
||||
|
||||
The maximum number of open files.
|
||||
@ -1947,7 +1904,7 @@ Config fields:
|
||||
- `regexp` - RE2 compatible regular expression (mandatory)
|
||||
- `replace` - substitution string for sensitive data (optional, by default - six asterisks)
|
||||
|
||||
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries).
|
||||
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parseable queries).
|
||||
|
||||
`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches.
|
||||
|
||||
|
@ -1182,7 +1182,7 @@ Possible values:
|
||||
|
||||
- `bin` - as 16-bytes binary.
|
||||
- `str` - as a string of 36 bytes.
|
||||
- `ext` - as extention with ExtType = 2.
|
||||
- `ext` - as extension with ExtType = 2.
|
||||
|
||||
Default value: `ext`.
|
||||
|
||||
|
@ -646,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
|
||||
However, the block size cannot be more than `max_block_size` rows.
|
||||
By default: 1,000,000. It only works when reading from MergeTree engines.
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
|
||||
```
|
||||
|
||||
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
|
||||
|
||||
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
|
||||
|
||||
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
|
||||
|
||||
Modifying the setting for one query or user does not affect other queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No limit.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
|
||||
|
||||
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
|
||||
|
||||
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
|
||||
@ -1050,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client.
|
||||
|
||||
Default value: 10, 300, 300.
|
||||
|
||||
## handshake_timeout_ms {#handshake-timeout-ms}
|
||||
|
||||
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
|
||||
|
||||
Default value: 10000.
|
||||
|
||||
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
|
||||
|
||||
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
|
||||
@ -1107,7 +1155,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
|
||||
@ -1128,7 +1176,7 @@ Default value: `0`.
|
||||
Could be used for throttling speed when replicating the data to add or replace new nodes.
|
||||
|
||||
:::note
|
||||
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
|
||||
:::
|
||||
|
||||
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
|
||||
@ -2030,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock
|
||||
|
||||
## distributed_push_down_limit {#distributed-push-down-limit}
|
||||
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
|
||||
Enables or disables [LIMIT](#limit) applying on each shard separately.
|
||||
|
||||
This will allow to avoid:
|
||||
- Sending extra rows over network;
|
||||
@ -2431,7 +2479,7 @@ Default value: 0.
|
||||
|
||||
## allow_introspection_functions {#settings-allow_introspection_functions}
|
||||
|
||||
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3492,7 +3540,7 @@ Default value: `0`.
|
||||
|
||||
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
|
||||
|
||||
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
|
||||
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
|
||||
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
|
||||
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
|
||||
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
|
||||
|
@ -12,7 +12,7 @@ Columns:
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
|
||||
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
|
||||
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
|
||||
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadata_dropped directory.
|
||||
- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
|
||||
|
||||
**Example**
|
||||
|
@ -43,7 +43,7 @@ Columns:
|
||||
- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type.
|
||||
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
|
||||
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
|
||||
|
@ -3,7 +3,7 @@ slug: /en/operations/system-tables/licenses
|
||||
---
|
||||
# licenses
|
||||
|
||||
Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
|
||||
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
|
||||
|
||||
Columns:
|
||||
|
||||
|
@ -100,7 +100,7 @@ Columns:
|
||||
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
|
||||
|
||||
:::note
|
||||
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
|
||||
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simplest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
|
||||
:::
|
||||
|
||||
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
|
||||
|
@ -14,8 +14,8 @@ Columns:
|
||||
- `['user_name']` — Connections with the same user name share the same quota.
|
||||
- `['ip_address']` — Connections from the same IP share the same quota.
|
||||
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`.
|
||||
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `user_name`.
|
||||
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `ip_address`.
|
||||
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
|
||||
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
|
||||
- `0` — The quota applies to users specify in the `apply_to_list`.
|
||||
|
@ -50,7 +50,7 @@ Columns:
|
||||
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
|
||||
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
|
||||
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
|
||||
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underlying `Buffer` table).
|
||||
|
||||
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).
|
||||
|
||||
|
@ -43,7 +43,7 @@ Columns:
|
||||
|
||||
- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string.
|
||||
|
||||
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0.
|
||||
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -33,7 +33,7 @@ Columns with request response parameters:
|
||||
|
||||
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
|
||||
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
|
||||
- `ZOK` — The request was executed seccessfully.
|
||||
- `ZOK` — The request was executed successfully.
|
||||
- `ZCONNECTIONLOSS` — The connection was lost.
|
||||
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
|
||||
- `ZSESSIONEXPIRED` — The session has expired.
|
||||
@ -43,7 +43,7 @@ Columns with request response parameters:
|
||||
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
|
||||
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
|
||||
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
|
||||
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
|
||||
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified children of this ZooKeeper node.
|
||||
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
|
||||
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
|
||||
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.
|
||||
|
@ -24,7 +24,7 @@ It is designed to retain the following properties of data:
|
||||
|
||||
Most of the properties above are viable for performance testing:
|
||||
|
||||
reading data, filtering, aggregatio, and sorting will work at almost the same speed
|
||||
reading data, filtering, aggregation, and sorting will work at almost the same speed
|
||||
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
|
||||
|
||||
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.
|
||||
|
@ -356,7 +356,7 @@ Type: `UInt8`.
|
||||
|
||||
Let’s consider an example of calculating the `retention` function to determine site traffic.
|
||||
|
||||
**1.** Сreate a table to illustrate an example.
|
||||
**1.** Create a table to illustrate an example.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory;
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 351
|
||||
|
||||
# cramersV
|
||||
|
||||
[Cramér's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramér's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
|
||||
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_position: 352
|
||||
# cramersVBiasCorrected
|
||||
|
||||
|
||||
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
|
||||
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
|
||||
|
||||
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_title: exponentialMovingAverage
|
||||
|
||||
## exponentialMovingAverage
|
||||
|
||||
Сalculates the exponential moving average of values for the determined time.
|
||||
Calculates the exponential moving average of values for the determined time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -27,7 +27,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
|
||||
- Returns an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 125
|
||||
|
||||
# groupBitAnd
|
||||
|
||||
Applies bitwise `AND` for series of numbers.
|
||||
Applies bit-wise `AND` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitAnd(expr)
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 126
|
||||
|
||||
# groupBitOr
|
||||
|
||||
Applies bitwise `OR` for series of numbers.
|
||||
Applies bit-wise `OR` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitOr(expr)
|
||||
|
@ -5,7 +5,7 @@ sidebar_position: 127
|
||||
|
||||
# groupBitXor
|
||||
|
||||
Applies bitwise `XOR` for series of numbers.
|
||||
Applies bit-wise `XOR` for series of numbers.
|
||||
|
||||
``` sql
|
||||
groupBitXor(expr)
|
||||
|
@ -30,11 +30,11 @@ Samples must belong to continuous, one-dimensional probability distributions.
|
||||
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
|
||||
And the alternative is that the distributions are not identical.
|
||||
- `'greater'`
|
||||
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
|
||||
The null hypothesis is that values in the first sample are *stochastically smaller* than those in the second one,
|
||||
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
|
||||
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
|
||||
- `'less'`.
|
||||
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
|
||||
The null hypothesis is that values in the first sample are *stochastically greater* than those in the second one,
|
||||
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
|
||||
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
|
||||
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
|
@ -14,7 +14,7 @@ The result depends on the order of running the query, and is nondeterministic.
|
||||
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
|
||||
|
||||
:::note
|
||||
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
|
||||
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significant error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
@ -18,7 +18,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
|
||||
1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`.
|
||||
2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`.
|
||||
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
|
||||
|
||||
### Usage
|
||||
|
||||
|
@ -22,7 +22,7 @@ Resolution: 1 second.
|
||||
|
||||
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’).
|
||||
|
||||
Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
|
||||
Timezone agnostic Unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions etc.). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
|
||||
|
||||
A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia.
|
||||
|
||||
@ -30,7 +30,7 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
|
||||
|
@ -27,7 +27,7 @@ ClickHouse data types include:
|
||||
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
|
||||
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
|
||||
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
|
||||
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
|
||||
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type)
|
||||
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
|
||||
- **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
|
||||
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
|
||||
|
@ -247,7 +247,7 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
|
||||
|
||||
### hashed
|
||||
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
@ -984,7 +984,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
|
||||
...
|
||||
```
|
||||
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronous and asynchronous updates are supported.
|
||||
|
||||
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
|
||||
|
||||
@ -1243,8 +1243,8 @@ Setting fields:
|
||||
- `password` – Password required for the authentication.
|
||||
- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter.
|
||||
- `header` – Single HTTP header entry.
|
||||
- `name` – Identifiant name used for the header send on the request.
|
||||
- `value` – Value set for a specific identifiant name.
|
||||
- `name` – Identifier name used for the header send on the request.
|
||||
- `value` – Value set for a specific identifier name.
|
||||
|
||||
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
|
||||
|
||||
|
@ -140,7 +140,7 @@ range([start, ] end [, step])
|
||||
|
||||
**Implementation details**
|
||||
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
|
||||
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
|
||||
|
||||
**Examples**
|
||||
@ -1236,7 +1236,7 @@ arrayAUC(arr_scores, arr_labels)
|
||||
**Arguments**
|
||||
|
||||
- `arr_scores` — scores prediction model gives.
|
||||
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
|
||||
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -226,7 +226,7 @@ Result:
|
||||
|
||||
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
|
||||
The conjuction for bitwise operations:
|
||||
The conjuction for bit-wise operations:
|
||||
|
||||
0 AND 0 = 0
|
||||
|
||||
@ -291,7 +291,7 @@ Result:
|
||||
|
||||
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
|
||||
The disjunction for bitwise operations:
|
||||
The disjunction for bit-wise operations:
|
||||
|
||||
0 OR 0 = 0
|
||||
|
||||
|
@ -487,7 +487,7 @@ cosineDistance(vector1, vector2)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Cosine of the angle between two vectors substracted from one.
|
||||
- Cosine of the angle between two vectors subtracted from one.
|
||||
|
||||
Type: [Float](../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -31,9 +31,9 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
**Arguments**
|
||||
|
||||
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
@ -165,7 +165,7 @@ Received exception from server (version 22.6.1):
|
||||
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
|
||||
```
|
||||
|
||||
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
|
||||
While `aes_encrypt_mysql` produces MySQL-compatible output:
|
||||
|
||||
Query:
|
||||
|
||||
@ -233,7 +233,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
@ -364,7 +364,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
|
||||
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
|
||||
- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Files
|
||||
|
||||
## file
|
||||
|
||||
Reads file as string and loads the data into the specified column. The actual file content is not interpreted.
|
||||
Reads a file as string and loads the data into the specified column. The file content is not interpreted.
|
||||
|
||||
Also see table function [file](../table-functions/file.md).
|
||||
|
||||
@ -18,15 +18,13 @@ file(path[, default])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
|
||||
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Example**
|
||||
|
||||
Inserting data from files a.txt and b.txt into a table as strings:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
INSERT INTO table SELECT file('a.txt'), file('b.txt');
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: Nullable
|
||||
|
||||
## isNull
|
||||
|
||||
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
``` sql
|
||||
isNull(x)
|
||||
@ -18,7 +18,7 @@ Alias: `ISNULL`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — A value with a non-compound data type.
|
||||
- `x` — A value of non-compound data type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -27,7 +27,7 @@ Alias: `ISNULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
@ -36,12 +36,14 @@ Input table
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Query
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT x FROM t_null WHERE isNull(y);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
@ -50,7 +52,7 @@ SELECT x FROM t_null WHERE isNull(y);
|
||||
|
||||
## isNotNull
|
||||
|
||||
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
``` sql
|
||||
isNotNull(x)
|
||||
@ -58,16 +60,16 @@ isNotNull(x)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- `x` — A value with a non-compound data type.
|
||||
- `x` — A value of non-compound data type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `0` if `x` is `NULL`.
|
||||
- `1` if `x` is not `NULL`.
|
||||
- `0` if `x` is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
@ -76,12 +78,14 @@ Input table
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Query
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT x FROM t_null WHERE isNotNull(y);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─x─┐
|
||||
│ 2 │
|
||||
@ -90,7 +94,7 @@ SELECT x FROM t_null WHERE isNotNull(y);
|
||||
|
||||
## coalesce
|
||||
|
||||
Checks from left to right whether `NULL` arguments were passed and returns the first non-`NULL` argument.
|
||||
Returns the leftmost non-`NULL` argument.
|
||||
|
||||
``` sql
|
||||
coalesce(x,...)
|
||||
@ -98,11 +102,11 @@ coalesce(x,...)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- Any number of parameters of a non-compound type. All parameters must be compatible by data type.
|
||||
- Any number of parameters of non-compound type. All parameters must be of mutually compatible data types.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The first non-`NULL` argument.
|
||||
- The first non-`NULL` argument
|
||||
- `NULL`, if all arguments are `NULL`.
|
||||
|
||||
**Example**
|
||||
@ -110,10 +114,10 @@ coalesce(x,...)
|
||||
Consider a list of contacts that may specify multiple ways to contact a customer.
|
||||
|
||||
``` text
|
||||
┌─name─────┬─mail─┬─phone─────┬──icq─┐
|
||||
┌─name─────┬─mail─┬─phone─────┬──telegram─┐
|
||||
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────┴───────────┴──────┘
|
||||
└──────────┴──────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
|
||||
@ -121,19 +125,19 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
|
||||
Get the first available contact method for the customer from the contact list:
|
||||
|
||||
``` sql
|
||||
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
|
||||
SELECT name, coalesce(mail, phone, CAST(telegram,'Nullable(String)')) FROM aBook;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(telegram, 'Nullable(String)'))─┐
|
||||
│ client 1 │ 123-45-67 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────────────────────────────────────────────────────┘
|
||||
└──────────┴───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ifNull
|
||||
|
||||
Returns an alternative value if the main argument is `NULL`.
|
||||
Returns an alternative value if the argument is `NULL`.
|
||||
|
||||
``` sql
|
||||
ifNull(x, alt)
|
||||
@ -146,25 +150,33 @@ ifNull(x,alt)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The value `x`, if `x` is not `NULL`.
|
||||
- The value `alt`, if `x` is `NULL`.
|
||||
- `x` if `x` is not `NULL`.
|
||||
- `alt` if `x` is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT ifNull('a', 'b');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─ifNull('a', 'b')─┐
|
||||
│ a │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT ifNull(NULL, 'b');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─ifNull(NULL, 'b')─┐
|
||||
│ b │
|
||||
@ -173,7 +185,7 @@ SELECT ifNull(NULL, 'b');
|
||||
|
||||
## nullIf
|
||||
|
||||
Returns `NULL` if the arguments are equal.
|
||||
Returns `NULL` if both arguments are equal.
|
||||
|
||||
``` sql
|
||||
nullIf(x, y)
|
||||
@ -181,29 +193,37 @@ nullIf(x, y)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
`x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
|
||||
`x`, `y` — Values to compare. Must be of compatible types.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- `NULL`, if the arguments are equal.
|
||||
- The `x` value, if the arguments are not equal.
|
||||
- `NULL` if the arguments are equal.
|
||||
- `x` if the arguments are not equal.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT nullIf(1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─nullIf(1, 1)─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT nullIf(1, 2);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─nullIf(1, 2)─┐
|
||||
│ 1 │
|
||||
@ -212,7 +232,7 @@ SELECT nullIf(1, 2);
|
||||
|
||||
## assumeNotNull
|
||||
|
||||
Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions.
|
||||
Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
|
||||
|
||||
``` sql
|
||||
assumeNotNull(x)
|
||||
@ -224,36 +244,29 @@ assumeNotNull(x)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The original value from the non-`Nullable` type, if it is not `NULL`.
|
||||
- Implementation specific result if the original value was `NULL`.
|
||||
- The input value as non-`Nullable` type, if it is not `NULL`.
|
||||
- An arbitrary value, if the input value is `NULL`.
|
||||
|
||||
**Example**
|
||||
|
||||
Consider the `t_null` table.
|
||||
|
||||
``` sql
|
||||
SHOW CREATE TABLE t_null;
|
||||
```
|
||||
Table:
|
||||
|
||||
``` text
|
||||
┌─statement─────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │
|
||||
└───────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
Apply the `assumeNotNull` function to the `y` column.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT assumeNotNull(y) FROM t_null;
|
||||
SELECT assumeNotNull(y) FROM table;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─assumeNotNull(y)─┐
|
||||
│ 0 │
|
||||
@ -261,10 +274,14 @@ SELECT assumeNotNull(y) FROM t_null;
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(assumeNotNull(y)) FROM t_null;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(assumeNotNull(y))─┐
|
||||
│ Int8 │
|
||||
@ -282,28 +299,36 @@ toNullable(x)
|
||||
|
||||
**Arguments:**
|
||||
|
||||
- `x` — The value of any non-compound type.
|
||||
- `x` — A value of non-compound type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The input value with a `Nullable` type.
|
||||
- The input value but of `Nullable` type.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(10);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(10)─┐
|
||||
│ UInt8 │
|
||||
└────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(toNullable(10));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(toNullable(10))─┐
|
||||
│ Nullable(UInt8) │
|
||||
|
@ -12,7 +12,7 @@ A latitude and longitude pair can be transformed to a 64-bit H3 index, identifyi
|
||||
|
||||
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
|
||||
|
||||
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
|
||||
The full description of the H3 system is available at [the Uber Engineering site](https://eng.uber.com/h3/).
|
||||
|
||||
## h3IsValid
|
||||
|
||||
|
@ -249,7 +249,7 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
|
||||
**Returned values**
|
||||
|
||||
- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
|
||||
- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -697,7 +697,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
|
||||
|
||||
## gccMurmurHash
|
||||
|
||||
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between CLang and GCC builds.
|
||||
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between Clang and GCC builds.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1161,7 +1161,7 @@ wordShingleSimHashUTF8(string[, shinglesize])
|
||||
**Arguments**
|
||||
|
||||
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -12,7 +12,9 @@ Zero as an argument is considered `false`, non-zero values are considered `true`
|
||||
|
||||
## and
|
||||
|
||||
Calculates the logical conjunction between two or more values.
|
||||
Calculates the logical conjunction of two or more values.
|
||||
|
||||
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -20,9 +22,7 @@ Calculates the logical conjunction between two or more values.
|
||||
and(val1, val2...)
|
||||
```
|
||||
|
||||
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
|
||||
|
||||
Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator).
|
||||
Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-operator).
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -30,8 +30,8 @@ Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-ope
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `0`, if there at least one argument evaluates to `false`,
|
||||
- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`,
|
||||
- `0`, if at least one argument evaluates to `false`,
|
||||
- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`,
|
||||
- `1`, otherwise.
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
|
||||
@ -66,7 +66,9 @@ Result:
|
||||
|
||||
## or
|
||||
|
||||
Calculates the logical disjunction between two or more values.
|
||||
Calculates the logical disjunction of two or more values.
|
||||
|
||||
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -74,9 +76,7 @@ Calculates the logical disjunction between two or more values.
|
||||
or(val1, val2...)
|
||||
```
|
||||
|
||||
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
|
||||
|
||||
Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator).
|
||||
Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-operator).
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -120,7 +120,7 @@ Result:
|
||||
|
||||
## not
|
||||
|
||||
Calculates logical negation of a value.
|
||||
Calculates the logical negation of a value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -128,7 +128,7 @@ Calculates logical negation of a value.
|
||||
not(val);
|
||||
```
|
||||
|
||||
Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator).
|
||||
Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-negation-operator).
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -158,7 +158,7 @@ Result:
|
||||
|
||||
## xor
|
||||
|
||||
Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc.
|
||||
Calculates the logical exclusive disjunction of two or more values. For more than two input values, the function first xor-s the first two values, then xor-s the result with the third value etc.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -52,7 +52,7 @@ Alias: `ln(x)`
|
||||
|
||||
## exp2
|
||||
|
||||
Returns 2 to the power of the given argumetn
|
||||
Returns 2 to the power of the given argument
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -82,7 +82,7 @@ log2(x)
|
||||
|
||||
## exp10
|
||||
|
||||
Returns 10 to the power of the given argumetn
|
||||
Returns 10 to the power of the given argument.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -31,7 +31,7 @@ Uses a linear congruential generator.
|
||||
|
||||
## randCanonical
|
||||
|
||||
Returns a Float64 value, evenly distributed in [0, 1).
|
||||
Returns a random Float64 value, evenly distributed in interval [0, 1).
|
||||
|
||||
## randConstant
|
||||
|
||||
@ -54,11 +54,9 @@ Result:
|
||||
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
# Functions for Generating Random Numbers based on a Distribution
|
||||
|
||||
## randUniform
|
||||
|
||||
Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
|
||||
Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -68,8 +66,8 @@ randUniform(min, max)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `min` - `Float64` - min value of the range,
|
||||
- `max` - `Float64` - max value of the range.
|
||||
- `min` - `Float64` - left boundary of the range,
|
||||
- `max` - `Float64` - right boundary of the range.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -97,7 +95,7 @@ Result:
|
||||
|
||||
## randNormal
|
||||
|
||||
Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||
Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -108,7 +106,7 @@ randNormal(mean, variance)
|
||||
**Arguments**
|
||||
|
||||
- `mean` - `Float64` - mean value of distribution,
|
||||
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
|
||||
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -136,7 +134,7 @@ Result:
|
||||
|
||||
## randLogNormal
|
||||
|
||||
Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
|
||||
Returns a random Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -147,7 +145,7 @@ randLogNormal(mean, variance)
|
||||
**Arguments**
|
||||
|
||||
- `mean` - `Float64` - mean value of distribution,
|
||||
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
|
||||
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -175,7 +173,7 @@ Result:
|
||||
|
||||
## randBinomial
|
||||
|
||||
Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
|
||||
Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -186,7 +184,7 @@ randBinomial(experiments, probability)
|
||||
**Arguments**
|
||||
|
||||
- `experiments` - `UInt64` - number of experiments,
|
||||
- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only).
|
||||
- `probability` - `Float64` - probability of success in each experiment, a value between 0 and 1.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -214,7 +212,7 @@ Result:
|
||||
|
||||
## randNegativeBinomial
|
||||
|
||||
Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
|
||||
Returns a random UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -225,7 +223,7 @@ randNegativeBinomial(experiments, probability)
|
||||
**Arguments**
|
||||
|
||||
- `experiments` - `UInt64` - number of experiments,
|
||||
- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only).
|
||||
- `probability` - `Float64` - probability of failure in each experiment, a value between 0 and 1.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -253,7 +251,7 @@ Result:
|
||||
|
||||
## randPoisson
|
||||
|
||||
Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
|
||||
Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -291,7 +289,7 @@ Result:
|
||||
|
||||
## randBernoulli
|
||||
|
||||
Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
|
||||
Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -301,7 +299,7 @@ randBernoulli(probability)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `probability` - `Float64` - probability of success (values in `0...1` range only).
|
||||
- `probability` - `Float64` - probability of success, a value between 0 and 1.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -329,7 +327,7 @@ Result:
|
||||
|
||||
## randExponential
|
||||
|
||||
Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
|
||||
Returns a random Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -367,7 +365,7 @@ Result:
|
||||
|
||||
## randChiSquared
|
||||
|
||||
Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
|
||||
Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -405,7 +403,7 @@ Result:
|
||||
|
||||
## randStudentT
|
||||
|
||||
Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
|
||||
Returns a random Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -443,7 +441,7 @@ Result:
|
||||
|
||||
## randFisherF
|
||||
|
||||
Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
|
||||
Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -480,11 +478,9 @@ Result:
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
# Functions for Generating Random Strings
|
||||
|
||||
## randomString
|
||||
|
||||
Returns a random String of specified `length`. Not all characters may be printable.
|
||||
Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -492,13 +488,78 @@ Returns a random String of specified `length`. Not all characters may be printab
|
||||
randomString(length)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `length` — String length in bytes. Positive integer.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- String filled with random bytes.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
str: 3 G : pT ?w тi k aV f6
|
||||
len: 30
|
||||
|
||||
Row 2:
|
||||
──────
|
||||
str: 9 ,] ^ ) ]?? 8
|
||||
len: 30
|
||||
```
|
||||
|
||||
## randomFixedString
|
||||
|
||||
Like `randomString` but returns a FixedString.
|
||||
Generates a binary string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
randomFixedString(length);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
- String filled with random bytes.
|
||||
|
||||
Type: [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT randomFixedString(13) as rnd, toTypeName(rnd)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─rnd──────┬─toTypeName(randomFixedString(13))─┐
|
||||
│ j▒h㋖HɨZ'▒ │ FixedString(13) │
|
||||
└──────────┴───────────────────────────────────┘
|
||||
```
|
||||
|
||||
## randomPrintableASCII
|
||||
|
||||
Returns a random String of specified `length`. All characters are printable.
|
||||
Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) characters. All characters are printable.
|
||||
If you pass `length < 0`, the behavior of the function is undefined.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -506,21 +567,71 @@ Returns a random String of specified `length`. All characters are printable.
|
||||
randomPrintableASCII(length)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `length` — String length in bytes. Positive integer.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐
|
||||
│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │
|
||||
│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │
|
||||
│ 2 │ /"+<"wUTh:=LjJ Vm!c&hI*m#XTfzz │ 30 │
|
||||
└────────┴────────────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## randomStringUTF8
|
||||
|
||||
Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable
|
||||
Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
randomStringUTF8(length)
|
||||
randomStringUTF8(length);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
- UTF-8 random string.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT randomStringUTF8(13)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─randomStringUTF8(13)─┐
|
||||
│ 𘤗д兠庇 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## fuzzBits
|
||||
|
||||
**Syntax**
|
||||
|
||||
Inverts the bits of String or FixedString `s`, each with probability `prob`.
|
||||
Flips the bits of String or FixedString `s`, each with probability `prob`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -529,8 +640,8 @@ fuzzBits(s, prob)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `s` - `String` or `FixedString`
|
||||
- `prob` - constant `Float32/64`
|
||||
- `s` - `String` or `FixedString`,
|
||||
- `prob` - constant `Float32/64` between 0.0 and 1.0.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -393,7 +393,7 @@ Reverses a sequence of Unicode code points in a string. Assumes that the string
|
||||
|
||||
## format
|
||||
|
||||
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers).
|
||||
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers).
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: Replacing in Strings
|
||||
|
||||
# Functions for Replacing in Strings
|
||||
|
||||
[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately.
|
||||
[General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately.
|
||||
|
||||
## replaceOne
|
||||
|
||||
|
@ -793,7 +793,7 @@ toDecimalString(number, scale)
|
||||
**Returned value**
|
||||
|
||||
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
|
||||
The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale.
|
||||
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -19,7 +19,7 @@ A function configuration contains the following settings:
|
||||
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
|
||||
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
|
||||
- `return_type` - the type of a returned value.
|
||||
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
|
||||
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
|
||||
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
|
||||
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
|
||||
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
|
||||
|
@ -222,7 +222,7 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case
|
||||
|
||||
### Distributed Subqueries and max_rows_in_set
|
||||
|
||||
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
|
||||
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries.
|
||||
|
||||
This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
|
||||
```sql
|
||||
|
@ -24,7 +24,7 @@ For tuple negation: [tupleNegate](../../sql-reference/functions/tuple-functions.
|
||||
|
||||
`a * b` – The `multiply (a, b)` function.
|
||||
|
||||
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar profuct: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
|
||||
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar product: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
|
||||
|
||||
`a / b` – The `divide(a, b)` function.
|
||||
|
||||
|
@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
|
||||
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
|
||||
|
||||
``` sql
|
||||
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
|
@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
|
||||
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
|
||||
```
|
||||
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
|
||||
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
|
||||
|
||||
``` sql
|
||||
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;
|
||||
|
@ -127,7 +127,7 @@ CROSS JOIN system.numbers AS c
|
||||
|
||||
Settings:
|
||||
|
||||
- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
|
||||
- `run_passes` — Run all query tree passes before dumping the query tree. Default: `1`.
|
||||
- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
|
||||
- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
|
||||
|
||||
@ -475,5 +475,5 @@ Result:
|
||||
```
|
||||
|
||||
:::note
|
||||
The validation is not complete, so a successfull query does not guarantee that the override would not cause issues.
|
||||
The validation is not complete, so a successful query does not guarantee that the override would not cause issues.
|
||||
:::
|
||||
|
@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
|
||||
- Data is merged during query execution.
|
||||
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
|
||||
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates).
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates).
|
||||
|
||||
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
|
||||
|
||||
|
@ -289,7 +289,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
|
||||
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
|
||||
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
|
||||
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
|
||||
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns.
|
||||
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns.
|
||||
|
||||
Example of a query without `WITH FILL`:
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: urlCluster
|
||||
|
||||
# urlCluster Table Function
|
||||
|
||||
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisk in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -21,7 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
||||
| `lag/lead(value, offset)` | Not supported. Workarounds: |
|
||||
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
|
||||
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unounded following). |
|
||||
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
|
||||
|
||||
## ClickHouse-specific Window Functions
|
||||
|
||||
@ -39,7 +39,7 @@ The computed value is the following for each row:
|
||||
|
||||
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
|
||||
|
||||
All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
|
||||
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
|
||||
|
||||
### Tests
|
||||
|
||||
|
@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
|
||||
При помощи [интефейса CLI](../../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
|
||||
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
|
||||
```
|
||||
|
||||
Чтобы не вставлять данные вручную, используйте одну из [готовых библиотек](../../interfaces/index.md).
|
||||
|
@ -138,7 +138,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
OutdatedPartsLoadingThreadPool::initialize(
|
||||
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000));
|
||||
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16));
|
||||
}
|
||||
|
||||
|
||||
|
@ -696,7 +696,7 @@ try
|
||||
OutdatedPartsLoadingThreadPool::initialize(
|
||||
server_settings.max_outdated_parts_loading_thread_pool_size,
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
server_settings.outdated_part_loading_thread_pool_queue_size);
|
||||
server_settings.max_outdated_parts_loading_thread_pool_size);
|
||||
|
||||
/// Initialize global local cache for remote filesystem.
|
||||
if (config().has("local_cache_for_remote_fs"))
|
||||
|
@ -190,7 +190,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
|
||||
connected = true;
|
||||
|
||||
sendHello();
|
||||
receiveHello();
|
||||
receiveHello(timeouts.handshake_timeout);
|
||||
if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
|
||||
sendAddendum();
|
||||
|
||||
@ -321,8 +321,10 @@ void Connection::sendAddendum()
|
||||
}
|
||||
|
||||
|
||||
void Connection::receiveHello()
|
||||
void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
|
||||
{
|
||||
TimeoutSetter timeout_setter(*socket, socket->getSendTimeout(), handshake_timeout);
|
||||
|
||||
/// Receive hello packet.
|
||||
UInt64 packet_type = 0;
|
||||
|
||||
@ -375,6 +377,10 @@ void Connection::receiveHello()
|
||||
receiveException()->rethrow();
|
||||
else
|
||||
{
|
||||
/// Reset timeout_setter before disconnect,
|
||||
/// because after disconnect socket will be invalid.
|
||||
timeout_setter.reset();
|
||||
|
||||
/// Close connection, to not stay in unsynchronised state.
|
||||
disconnect();
|
||||
throwUnexpectedPacket(packet_type, "Hello or Exception");
|
||||
|
@ -256,7 +256,7 @@ private:
|
||||
void connect(const ConnectionTimeouts & timeouts);
|
||||
void sendHello();
|
||||
void sendAddendum();
|
||||
void receiveHello();
|
||||
void receiveHello(const Poco::Timespan & handshake_timeout);
|
||||
|
||||
#if USE_SSL
|
||||
void sendClusterNameAndSalt();
|
||||
|
@ -67,7 +67,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
|
||||
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
|
||||
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
|
||||
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
|
||||
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
|
||||
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0),
|
||||
Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000), 0));
|
||||
|
||||
timeouts.sync_request_timeout = Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0);
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "hasLinuxCapability.h"
|
||||
#include <base/unaligned.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include <cerrno>
|
||||
@ -202,10 +203,12 @@ bool checkPermissionsImpl()
|
||||
/// Check that we can successfully initialize TaskStatsInfoGetter.
|
||||
/// It will ask about family id through Netlink.
|
||||
/// On some LXC containers we have capability but we still cannot use Netlink.
|
||||
/// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result.
|
||||
|
||||
try
|
||||
{
|
||||
TaskStatsInfoGetter();
|
||||
::taskstats stats{};
|
||||
TaskStatsInfoGetter().getStat(stats, static_cast<pid_t>(getThreadId()));
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
|
@ -37,7 +37,6 @@ class QueryThreadLog;
|
||||
class TasksStatsCounters;
|
||||
struct RUsageCounters;
|
||||
struct PerfEventsCounters;
|
||||
class TaskStatsInfoGetter;
|
||||
class InternalTextLogsQueue;
|
||||
struct ViewRuntimeData;
|
||||
class QueryViewsLog;
|
||||
|
@ -272,7 +272,8 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
rollbackRequest(request_for_session, true);
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
rollbackRequestNoLock(request_for_session, true);
|
||||
throw;
|
||||
}
|
||||
|
||||
@ -411,6 +412,14 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing)
|
||||
{
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
nuraft::ptr<nuraft::snapshot> KeeperStateMachine::last_snapshot()
|
||||
{
|
||||
/// Just return the latest snapshot.
|
||||
|
@ -68,6 +68,8 @@ public:
|
||||
// (can happen in case of exception during preprocessing)
|
||||
void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing);
|
||||
|
||||
void rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing);
|
||||
|
||||
uint64_t last_commit_index() override { return last_committed_idx; }
|
||||
|
||||
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
|
||||
|
@ -22,7 +22,6 @@ namespace DB
|
||||
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
|
||||
M(UInt64, outdated_part_loading_thread_pool_queue_size, 10000, "Queue size for parts loading thread pool.", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
|
||||
|
@ -55,6 +55,7 @@ class IColumn;
|
||||
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \
|
||||
M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \
|
||||
M(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connection timeout if there are no replicas.", 0) \
|
||||
M(Milliseconds, handshake_timeout_ms, 10000, "Timeout for receiving HELLO packet from replicas.", 0) \
|
||||
M(Milliseconds, connect_timeout_with_failover_ms, 1000, "Connection timeout for selecting first healthy replica.", 0) \
|
||||
M(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
|
||||
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Timeout for receiving data from network, in seconds. If no bytes were received in this interval, exception is thrown. If you set this setting on client, the 'send_timeout' for the socket will be also set on the corresponding connection end on the server.", 0) \
|
||||
|
@ -1073,56 +1073,73 @@ private:
|
||||
size_t size = vec_from.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType h;
|
||||
ToType hash;
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
if constexpr (std::is_same_v<ToType, UInt64>)
|
||||
h = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
|
||||
hash = IntHash64Impl::apply(bit_cast<UInt64>(vec_from[i]));
|
||||
else
|
||||
h = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
|
||||
hash = IntHash32Impl::apply(bit_cast<UInt32>(vec_from[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
|
||||
h = JavaHashImpl::apply(vec_from[i]);
|
||||
hash = JavaHashImpl::apply(vec_from[i]);
|
||||
else
|
||||
{
|
||||
FromType v = vec_from[i];
|
||||
FromType value = vec_from[i];
|
||||
if constexpr (std::endian::native == std::endian::big)
|
||||
{
|
||||
FromType tmp_v;
|
||||
reverseMemcpy(&tmp_v, &v, sizeof(v));
|
||||
v = tmp_v;
|
||||
FromType value_reversed;
|
||||
reverseMemcpy(&value_reversed, &value, sizeof(value));
|
||||
value = value_reversed;
|
||||
}
|
||||
h = apply(key, reinterpret_cast<const char *>(&v), sizeof(v));
|
||||
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
ToType hash;
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
if constexpr (std::is_same_v<ToType, UInt64>)
|
||||
hash = IntHash64Impl::apply(bit_cast<UInt64>(value));
|
||||
else
|
||||
hash = IntHash32Impl::apply(bit_cast<UInt32>(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<Impl, JavaHashImpl>)
|
||||
hash = JavaHashImpl::apply(value);
|
||||
else
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::big)
|
||||
{
|
||||
FromType value_reversed;
|
||||
reverseMemcpy(&value_reversed, &value, sizeof(value));
|
||||
value = value_reversed;
|
||||
}
|
||||
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
|
||||
}
|
||||
}
|
||||
|
||||
size_t size = vec_to.size();
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
column->getName(), getName());
|
||||
@ -1139,46 +1156,40 @@ private:
|
||||
size_t size = vec_from.size();
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType h;
|
||||
ToType hash;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
{
|
||||
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
}
|
||||
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
else
|
||||
{
|
||||
char tmp_buffer[sizeof(vec_from[i])];
|
||||
reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i]));
|
||||
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
|
||||
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(vec_from[i]));
|
||||
}
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
|
||||
ToType h;
|
||||
ToType hash;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
{
|
||||
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
|
||||
}
|
||||
hash = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
|
||||
else
|
||||
{
|
||||
char tmp_buffer[sizeof(value)];
|
||||
reverseMemcpy(tmp_buffer, &value, sizeof(value));
|
||||
h = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
|
||||
hash = apply(key, reinterpret_cast<const char *>(tmp_buffer), sizeof(value));
|
||||
}
|
||||
size_t size = vec_to.size();
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, h);
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
}
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1191,11 +1202,11 @@ private:
|
||||
for (size_t i = 0, size = column->size(); i < size; ++i)
|
||||
{
|
||||
StringRef bytes = column->getDataAt(i);
|
||||
const ToType h = apply(key, bytes.data, bytes.size);
|
||||
const ToType hash = apply(key, bytes.data, bytes.size);
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1211,14 +1222,14 @@ private:
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const ToType h = apply(key,
|
||||
const ToType hash = apply(key,
|
||||
reinterpret_cast<const char *>(&data[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
@ -1231,11 +1242,11 @@ private:
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const ToType h = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
|
||||
@ -1245,17 +1256,11 @@ private:
|
||||
const size_t size = vec_to.size();
|
||||
|
||||
if constexpr (first)
|
||||
{
|
||||
vec_to.assign(size, hash);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
column->getName(), getName());
|
||||
@ -1283,16 +1288,16 @@ private:
|
||||
{
|
||||
ColumnArray::Offset next_offset = offsets[i];
|
||||
|
||||
ToType h;
|
||||
ToType hash;
|
||||
if constexpr (std::is_same_v<ToType, UInt64>)
|
||||
h = IntHash64Impl::apply(next_offset - current_offset);
|
||||
hash = IntHash64Impl::apply(next_offset - current_offset);
|
||||
else
|
||||
h = IntHash32Impl::apply(next_offset - current_offset);
|
||||
hash = IntHash32Impl::apply(next_offset - current_offset);
|
||||
|
||||
if constexpr (first)
|
||||
vec_to[i] = h;
|
||||
vec_to[i] = hash;
|
||||
else
|
||||
vec_to[i] = combineHashes(key, vec_to[i], h);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
|
||||
for (size_t j = current_offset; j < next_offset; ++j)
|
||||
vec_to[i] = combineHashes(key, vec_to[i], vec_temp[j]);
|
||||
|
@ -17,22 +17,7 @@ ConnectionTimeouts::ConnectionTimeouts(
|
||||
, secure_connection_timeout(connection_timeout)
|
||||
, hedged_connection_timeout(receive_timeout_)
|
||||
, receive_data_timeout(receive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
ConnectionTimeouts::ConnectionTimeouts(
|
||||
Poco::Timespan connection_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan tcp_keep_alive_timeout_)
|
||||
: connection_timeout(connection_timeout_)
|
||||
, send_timeout(send_timeout_)
|
||||
, receive_timeout(receive_timeout_)
|
||||
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
|
||||
, http_keep_alive_timeout(0)
|
||||
, secure_connection_timeout(connection_timeout)
|
||||
, hedged_connection_timeout(receive_timeout_)
|
||||
, receive_data_timeout(receive_timeout_)
|
||||
, handshake_timeout(receive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -41,7 +26,26 @@ ConnectionTimeouts::ConnectionTimeouts(
|
||||
Poco::Timespan send_timeout_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan http_keep_alive_timeout_)
|
||||
Poco::Timespan handshake_timeout_)
|
||||
: connection_timeout(connection_timeout_)
|
||||
, send_timeout(send_timeout_)
|
||||
, receive_timeout(receive_timeout_)
|
||||
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
|
||||
, http_keep_alive_timeout(0)
|
||||
, secure_connection_timeout(connection_timeout)
|
||||
, hedged_connection_timeout(receive_timeout_)
|
||||
, receive_data_timeout(receive_timeout_)
|
||||
, handshake_timeout(handshake_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
ConnectionTimeouts::ConnectionTimeouts(
|
||||
Poco::Timespan connection_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan http_keep_alive_timeout_,
|
||||
Poco::Timespan handshake_timeout_)
|
||||
: connection_timeout(connection_timeout_)
|
||||
, send_timeout(send_timeout_)
|
||||
, receive_timeout(receive_timeout_)
|
||||
@ -50,6 +54,7 @@ ConnectionTimeouts::ConnectionTimeouts(
|
||||
, secure_connection_timeout(connection_timeout)
|
||||
, hedged_connection_timeout(receive_timeout_)
|
||||
, receive_data_timeout(receive_timeout_)
|
||||
, handshake_timeout(handshake_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -60,16 +65,18 @@ ConnectionTimeouts::ConnectionTimeouts(
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan http_keep_alive_timeout_,
|
||||
Poco::Timespan secure_connection_timeout_,
|
||||
Poco::Timespan receive_hello_timeout_,
|
||||
Poco::Timespan receive_data_timeout_)
|
||||
Poco::Timespan hedged_connection_timeout_,
|
||||
Poco::Timespan receive_data_timeout_,
|
||||
Poco::Timespan handshake_timeout_)
|
||||
: connection_timeout(connection_timeout_)
|
||||
, send_timeout(send_timeout_)
|
||||
, receive_timeout(receive_timeout_)
|
||||
, tcp_keep_alive_timeout(tcp_keep_alive_timeout_)
|
||||
, http_keep_alive_timeout(http_keep_alive_timeout_)
|
||||
, secure_connection_timeout(secure_connection_timeout_)
|
||||
, hedged_connection_timeout(receive_hello_timeout_)
|
||||
, hedged_connection_timeout(hedged_connection_timeout_)
|
||||
, receive_data_timeout(receive_data_timeout_)
|
||||
, handshake_timeout(handshake_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -90,13 +97,14 @@ ConnectionTimeouts ConnectionTimeouts::getSaturated(Poco::Timespan limit) const
|
||||
saturate(http_keep_alive_timeout, limit),
|
||||
saturate(secure_connection_timeout, limit),
|
||||
saturate(hedged_connection_timeout, limit),
|
||||
saturate(receive_data_timeout, limit));
|
||||
saturate(receive_data_timeout, limit),
|
||||
saturate(handshake_timeout, limit));
|
||||
}
|
||||
|
||||
/// Timeouts for the case when we have just single attempt to connect.
|
||||
ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Settings & settings)
|
||||
{
|
||||
return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout);
|
||||
return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, settings.handshake_timeout_ms);
|
||||
}
|
||||
|
||||
/// Timeouts for the case when we will try many addresses in a loop.
|
||||
@ -110,7 +118,8 @@ ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings
|
||||
0,
|
||||
settings.connect_timeout_with_failover_secure_ms,
|
||||
settings.hedged_connection_timeout_ms,
|
||||
settings.receive_data_timeout_ms);
|
||||
settings.receive_data_timeout_ms,
|
||||
settings.handshake_timeout_ms);
|
||||
}
|
||||
|
||||
ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout)
|
||||
@ -120,7 +129,8 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings
|
||||
settings.http_send_timeout,
|
||||
settings.http_receive_timeout,
|
||||
settings.tcp_keep_alive_timeout,
|
||||
http_keep_alive_timeout);
|
||||
http_keep_alive_timeout,
|
||||
settings.http_receive_timeout);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ struct ConnectionTimeouts
|
||||
Poco::Timespan hedged_connection_timeout;
|
||||
Poco::Timespan receive_data_timeout;
|
||||
|
||||
/// Timeout for receiving HELLO packet
|
||||
Poco::Timespan handshake_timeout;
|
||||
|
||||
/// Timeout for synchronous request-result protocol call (like Ping or TablesStatus)
|
||||
Poco::Timespan sync_request_timeout = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0);
|
||||
|
||||
@ -35,13 +38,15 @@ struct ConnectionTimeouts
|
||||
ConnectionTimeouts(Poco::Timespan connection_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan tcp_keep_alive_timeout_);
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan handshake_timeout_);
|
||||
|
||||
ConnectionTimeouts(Poco::Timespan connection_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
Poco::Timespan receive_timeout_,
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan http_keep_alive_timeout_);
|
||||
Poco::Timespan http_keep_alive_timeout_,
|
||||
Poco::Timespan handshake_timeout_);
|
||||
|
||||
ConnectionTimeouts(Poco::Timespan connection_timeout_,
|
||||
Poco::Timespan send_timeout_,
|
||||
@ -49,8 +54,9 @@ struct ConnectionTimeouts
|
||||
Poco::Timespan tcp_keep_alive_timeout_,
|
||||
Poco::Timespan http_keep_alive_timeout_,
|
||||
Poco::Timespan secure_connection_timeout_,
|
||||
Poco::Timespan receive_hello_timeout_,
|
||||
Poco::Timespan receive_data_timeout_);
|
||||
Poco::Timespan hedged_connection_timeout_,
|
||||
Poco::Timespan receive_data_timeout_,
|
||||
Poco::Timespan handshake_timeout_);
|
||||
|
||||
static Poco::Timespan saturate(Poco::Timespan timespan, Poco::Timespan limit);
|
||||
ConnectionTimeouts getSaturated(Poco::Timespan limit) const;
|
||||
|
@ -29,14 +29,12 @@ TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan t
|
||||
|
||||
TimeoutSetter::~TimeoutSetter()
|
||||
{
|
||||
try
|
||||
{
|
||||
bool connected = socket.impl()->initialized();
|
||||
if (!connected)
|
||||
if (was_reset)
|
||||
return;
|
||||
|
||||
socket.setSendTimeout(old_send_timeout);
|
||||
socket.setReceiveTimeout(old_receive_timeout);
|
||||
try
|
||||
{
|
||||
reset();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -44,4 +42,15 @@ TimeoutSetter::~TimeoutSetter()
|
||||
}
|
||||
}
|
||||
|
||||
void TimeoutSetter::reset()
|
||||
{
|
||||
bool connected = socket.impl()->initialized();
|
||||
if (!connected)
|
||||
return;
|
||||
|
||||
socket.setSendTimeout(old_send_timeout);
|
||||
socket.setReceiveTimeout(old_receive_timeout);
|
||||
was_reset = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// Temporarily overrides socket send/receive timeouts and reset them back into destructor
|
||||
/// Temporarily overrides socket send/receive timeouts and reset them back into destructor (or manually by calling reset method)
|
||||
/// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value).
|
||||
struct TimeoutSetter
|
||||
{
|
||||
@ -19,6 +19,9 @@ struct TimeoutSetter
|
||||
|
||||
~TimeoutSetter();
|
||||
|
||||
/// Reset timeouts back.
|
||||
void reset();
|
||||
|
||||
Poco::Net::StreamSocket & socket;
|
||||
|
||||
Poco::Timespan send_timeout;
|
||||
@ -26,5 +29,6 @@ struct TimeoutSetter
|
||||
|
||||
Poco::Timespan old_send_timeout;
|
||||
Poco::Timespan old_receive_timeout;
|
||||
bool was_reset = false;
|
||||
};
|
||||
}
|
||||
|
@ -543,13 +543,17 @@ namespace
|
||||
template <typename Map, typename KeyGetter>
|
||||
struct Inserter
|
||||
{
|
||||
static ALWAYS_INLINE void insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
|
||||
static ALWAYS_INLINE bool insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i,
|
||||
Arena & pool)
|
||||
{
|
||||
auto emplace_result = key_getter.emplaceKey(map, i, pool);
|
||||
|
||||
if (emplace_result.isInserted() || join.anyTakeLastRow())
|
||||
{
|
||||
new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void insertAll(const HashJoin &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool)
|
||||
@ -582,7 +586,7 @@ namespace
|
||||
template <JoinStrictness STRICTNESS, typename KeyGetter, typename Map, bool has_null_map>
|
||||
size_t NO_INLINE insertFromBlockImplTypeCase(
|
||||
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
|
||||
{
|
||||
[[maybe_unused]] constexpr bool mapped_one = std::is_same_v<typename Map::mapped_type, RowRef>;
|
||||
constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof;
|
||||
@ -593,10 +597,18 @@ namespace
|
||||
|
||||
auto key_getter = createKeyGetter<KeyGetter, is_asof_join>(key_columns, key_sizes);
|
||||
|
||||
/// For ALL and ASOF join always insert values
|
||||
is_inserted = !mapped_one || is_asof_join;
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
if (has_null_map && (*null_map)[i])
|
||||
{
|
||||
/// nulls are not inserted into hash table,
|
||||
/// keep them for RIGHT and FULL joins
|
||||
is_inserted = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Check condition for right table from ON section
|
||||
if (join_mask && !(*join_mask)[i])
|
||||
@ -605,7 +617,7 @@ namespace
|
||||
if constexpr (is_asof_join)
|
||||
Inserter<Map, KeyGetter>::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column);
|
||||
else if constexpr (mapped_one)
|
||||
Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
|
||||
is_inserted |= Inserter<Map, KeyGetter>::insertOne(join, map, key_getter, stored_block, i, pool);
|
||||
else
|
||||
Inserter<Map, KeyGetter>::insertAll(join, map, key_getter, stored_block, i, pool);
|
||||
}
|
||||
@ -616,32 +628,37 @@ namespace
|
||||
template <JoinStrictness STRICTNESS, typename KeyGetter, typename Map>
|
||||
size_t insertFromBlockImplType(
|
||||
HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
|
||||
{
|
||||
if (null_map)
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, true>(
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted);
|
||||
else
|
||||
return insertFromBlockImplTypeCase<STRICTNESS, KeyGetter, Map, false>(
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool);
|
||||
join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted);
|
||||
}
|
||||
|
||||
|
||||
template <JoinStrictness STRICTNESS, typename Maps>
|
||||
size_t insertFromBlockImpl(
|
||||
HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns,
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool)
|
||||
const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case HashJoin::Type::EMPTY: return 0;
|
||||
case HashJoin::Type::CROSS: return 0; /// Do nothing. We have already saved block, and it is enough.
|
||||
case HashJoin::Type::EMPTY:
|
||||
[[fallthrough]];
|
||||
case HashJoin::Type::CROSS:
|
||||
/// Do nothing. We will only save block, and it is enough
|
||||
is_inserted = true;
|
||||
return 0;
|
||||
|
||||
#define M(TYPE) \
|
||||
case HashJoin::Type::TYPE: \
|
||||
return insertFromBlockImplType<STRICTNESS, typename KeyGetterForType<HashJoin::Type::TYPE, std::remove_reference_t<decltype(*maps.TYPE)>>::Type>(\
|
||||
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); \
|
||||
join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \
|
||||
break;
|
||||
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
@ -816,6 +833,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits)
|
||||
}
|
||||
}
|
||||
|
||||
bool is_inserted = false;
|
||||
if (kind != JoinKind::Cross)
|
||||
{
|
||||
joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map)
|
||||
@ -824,28 +842,35 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits)
|
||||
*this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map,
|
||||
/// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized
|
||||
join_mask_col.getData(),
|
||||
data->pool);
|
||||
data->pool, is_inserted);
|
||||
|
||||
if (multiple_disjuncts)
|
||||
used_flags.reinit<kind_, strictness_>(stored_block);
|
||||
else
|
||||
else if (is_inserted)
|
||||
/// Number of buckets + 1 value from zero storage
|
||||
used_flags.reinit<kind_, strictness_>(size + 1);
|
||||
});
|
||||
}
|
||||
|
||||
if (!multiple_disjuncts && save_nullmap)
|
||||
if (!multiple_disjuncts && save_nullmap && is_inserted)
|
||||
{
|
||||
data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes();
|
||||
data->blocks_nullmaps.emplace_back(stored_block, null_map_holder);
|
||||
}
|
||||
|
||||
if (!multiple_disjuncts && not_joined_map)
|
||||
if (!multiple_disjuncts && not_joined_map && is_inserted)
|
||||
{
|
||||
data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes();
|
||||
data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map));
|
||||
}
|
||||
|
||||
if (!multiple_disjuncts && !is_inserted)
|
||||
{
|
||||
LOG_TRACE(log, "Skipping inserting block with {} rows", rows);
|
||||
data->blocks_allocated_size -= stored_block->allocatedBytes();
|
||||
data->blocks.pop_back();
|
||||
}
|
||||
|
||||
if (!check_limits)
|
||||
return true;
|
||||
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
template <typename Result, typename Callback = std::function<Result()>>
|
||||
using ThreadPoolCallbackRunner = std::function<std::future<Result>(Callback &&, Priority)>;
|
||||
|
||||
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'.
|
||||
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrowOnError()'.
|
||||
template <typename Result, typename Callback = std::function<Result()>>
|
||||
ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name)
|
||||
{
|
||||
@ -44,7 +44,7 @@ ThreadPoolCallbackRunner<Result, Callback> threadPoolCallbackRunner(ThreadPool &
|
||||
|
||||
auto future = task->get_future();
|
||||
|
||||
my_pool->scheduleOrThrow([my_task = std::move(task)]{ (*my_task)(); }, priority);
|
||||
my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority);
|
||||
|
||||
return future;
|
||||
};
|
||||
|
@ -272,7 +272,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
{
|
||||
/// If totals step has HAVING expression, skip it for now.
|
||||
/// TODO:
|
||||
/// We can merge HAVING expression with current filter.
|
||||
/// We can merge HAVING expression with current filer.
|
||||
/// Also, we can push down part of HAVING which depend only on aggregation keys.
|
||||
if (totals_having->getActions())
|
||||
return 0;
|
||||
@ -323,9 +323,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
{
|
||||
const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin();
|
||||
|
||||
/// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys.
|
||||
/// Only inner and left(/right) join are supported. Other types may generate default values for left table keys.
|
||||
/// So, if we push down a condition like `key != 0`, not all rows may be filtered.
|
||||
if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind)
|
||||
if (table_join.kind() != JoinKind::Inner && table_join.kind() != kind)
|
||||
return 0;
|
||||
|
||||
bool is_left = kind == JoinKind::Left;
|
||||
|
@ -4854,6 +4854,9 @@ void MergeTreeData::checkAlterPartitionIsPossible(
|
||||
|
||||
void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context)
|
||||
{
|
||||
if (!supportsReplication() && isStaticStorage())
|
||||
return;
|
||||
|
||||
DataPartsVector parts_to_remove;
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
if (partition_ast && partition_ast->all)
|
||||
@ -4874,6 +4877,9 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context
|
||||
|
||||
void MergeTreeData::checkPartCanBeDropped(const String & part_name)
|
||||
{
|
||||
if (!supportsReplication() && isStaticStorage())
|
||||
return;
|
||||
|
||||
auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
|
||||
if (!part)
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name);
|
||||
|
@ -865,7 +865,7 @@ public:
|
||||
DiskPtr tryGetDiskForDetachedPart(const String & part_name) const;
|
||||
DiskPtr getDiskForDetachedPart(const String & part_name) const;
|
||||
|
||||
bool storesDataOnDisk() const override { return true; }
|
||||
bool storesDataOnDisk() const override { return !isStaticStorage(); }
|
||||
Strings getDataPaths() const override;
|
||||
|
||||
/// Reserves space at least 1MB.
|
||||
|
@ -114,7 +114,7 @@ StorageMergeTree::StorageMergeTree(
|
||||
|
||||
loadDataParts(has_force_restore_data_flag);
|
||||
|
||||
if (!attach && !getDataPartsForInternalUsage().empty())
|
||||
if (!attach && !getDataPartsForInternalUsage().empty() && !isStaticStorage())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA,
|
||||
"Data directory for table already containing data parts - probably "
|
||||
"it was unclean DROP table or manual intervention. "
|
||||
@ -283,6 +283,9 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met
|
||||
|
||||
void StorageMergeTree::checkTableCanBeDropped() const
|
||||
{
|
||||
if (!supportsReplication() && isStaticStorage())
|
||||
return;
|
||||
|
||||
auto table_id = getStorageID();
|
||||
getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes());
|
||||
}
|
||||
|
@ -10,16 +10,22 @@ def cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance(
|
||||
"node1", main_configs=["configs/storage_conf.xml"], with_nginx=True
|
||||
"node1",
|
||||
main_configs=["configs/storage_conf.xml"],
|
||||
with_nginx=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node2",
|
||||
main_configs=["configs/storage_conf_web.xml"],
|
||||
with_nginx=True,
|
||||
stay_alive=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
cluster.add_instance(
|
||||
"node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
|
||||
"node3",
|
||||
main_configs=["configs/storage_conf_web.xml"],
|
||||
with_nginx=True,
|
||||
with_zookeeper=True,
|
||||
)
|
||||
|
||||
cluster.add_instance(
|
||||
@ -95,7 +101,7 @@ def test_usage(cluster, node_name):
|
||||
for i in range(3):
|
||||
node2.query(
|
||||
"""
|
||||
ATTACH TABLE test{} UUID '{}'
|
||||
CREATE TABLE test{} UUID '{}'
|
||||
(id Int32) ENGINE = MergeTree() ORDER BY id
|
||||
SETTINGS storage_policy = 'web';
|
||||
""".format(
|
||||
@ -140,7 +146,7 @@ def test_incorrect_usage(cluster):
|
||||
global uuids
|
||||
node2.query(
|
||||
"""
|
||||
ATTACH TABLE test0 UUID '{}'
|
||||
CREATE TABLE test0 UUID '{}'
|
||||
(id Int32) ENGINE = MergeTree() ORDER BY id
|
||||
SETTINGS storage_policy = 'web';
|
||||
""".format(
|
||||
@ -173,7 +179,7 @@ def test_cache(cluster, node_name):
|
||||
for i in range(3):
|
||||
node2.query(
|
||||
"""
|
||||
ATTACH TABLE test{} UUID '{}'
|
||||
CREATE TABLE test{} UUID '{}'
|
||||
(id Int32) ENGINE = MergeTree() ORDER BY id
|
||||
SETTINGS storage_policy = 'cached_web';
|
||||
""".format(
|
||||
@ -238,7 +244,7 @@ def test_unavailable_server(cluster):
|
||||
global uuids
|
||||
node2.query(
|
||||
"""
|
||||
ATTACH TABLE test0 UUID '{}'
|
||||
CREATE TABLE test0 UUID '{}'
|
||||
(id Int32) ENGINE = MergeTree() ORDER BY id
|
||||
SETTINGS storage_policy = 'web';
|
||||
""".format(
|
||||
@ -276,3 +282,35 @@ def test_unavailable_server(cluster):
|
||||
)
|
||||
node2.start_clickhouse()
|
||||
node2.query("DROP TABLE test0 SYNC")
|
||||
|
||||
|
||||
def test_replicated_database(cluster):
|
||||
node1 = cluster.instances["node3"]
|
||||
node1.query(
|
||||
"CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r1')",
|
||||
settings={"allow_experimental_database_replicated": 1},
|
||||
)
|
||||
|
||||
global uuids
|
||||
node1.query(
|
||||
"""
|
||||
CREATE TABLE rdb.table0 UUID '{}'
|
||||
(id Int32) ENGINE = MergeTree() ORDER BY id
|
||||
SETTINGS storage_policy = 'web';
|
||||
""".format(
|
||||
uuids[0]
|
||||
)
|
||||
)
|
||||
|
||||
node2 = cluster.instances["node2"]
|
||||
node2.query(
|
||||
"CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r2')",
|
||||
settings={"allow_experimental_database_replicated": 1},
|
||||
)
|
||||
node2.query("SYSTEM SYNC DATABASE REPLICA rdb")
|
||||
|
||||
assert node1.query("SELECT count() FROM rdb.table0") == "5000000\n"
|
||||
assert node2.query("SELECT count() FROM rdb.table0") == "5000000\n"
|
||||
|
||||
node1.query("DROP DATABASE rdb SYNC")
|
||||
node2.query("DROP DATABASE rdb SYNC")
|
||||
|
@ -169,7 +169,9 @@ test_config3 = """
|
||||
def send_repeated_query(table, count=5):
|
||||
for i in range(count):
|
||||
node.query_and_get_error(
|
||||
"SELECT count() FROM {} SETTINGS receive_timeout=1".format(table)
|
||||
"SELECT count() FROM {} SETTINGS receive_timeout=1, handshake_timeout_ms=1".format(
|
||||
table
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
@ -40,8 +40,8 @@ def test(started_cluster):
|
||||
cluster.pause_container("node_1")
|
||||
|
||||
node.query("SYSTEM RELOAD CONFIG")
|
||||
node.query_and_get_error(
|
||||
"SELECT count() FROM distributed SETTINGS receive_timeout=1"
|
||||
error = node.query_and_get_error(
|
||||
"SELECT count() FROM distributed SETTINGS receive_timeout=1, handshake_timeout_ms=1"
|
||||
)
|
||||
|
||||
result = node.query(
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-tsan, no-debug
|
||||
# Tags: no-tsan, no-debug, no-msan
|
||||
# Tag no-tsan: Too long for TSan
|
||||
|
||||
# shellcheck disable=SC2016
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-tsan, no-debug, no-fasttest
|
||||
# Tags: no-tsan, no-debug, no-fasttest, no-msan
|
||||
# Tag no-tsan: Too long for TSan
|
||||
|
||||
# shellcheck disable=SC2016
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-tsan, no-debug
|
||||
# Tags: no-tsan, no-debug, no-msan
|
||||
# Tag no-tsan: Too long for TSan
|
||||
|
||||
# shellcheck disable=SC2016
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-tsan, no-debug
|
||||
# Tags: no-tsan, no-debug, no-msan
|
||||
# Tag no-tsan: Too long for TSan
|
||||
|
||||
# shellcheck disable=SC2016
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user