Merge branch 'master' into nulllogger

This commit is contained in:
Alexey Milovidov 2023-06-03 06:47:42 +03:00 committed by GitHub
commit dbfb3b810b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
211 changed files with 5809 additions and 1799 deletions

7
.gitmodules vendored
View File

@ -35,10 +35,9 @@
[submodule "contrib/unixodbc"]
path = contrib/unixodbc
url = https://github.com/ClickHouse/UnixODBC
[submodule "contrib/protobuf"]
path = contrib/protobuf
url = https://github.com/ClickHouse/protobuf
branch = v3.13.0.1
[submodule "contrib/google-protobuf"]
path = contrib/google-protobuf
url = https://github.com/ClickHouse/google-protobuf.git
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse/boost

View File

@ -88,7 +88,7 @@ add_contrib (thrift-cmake thrift)
# parquet/arrow/orc
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
add_contrib (avro-cmake avro) # requires: snappy
add_contrib (protobuf-cmake protobuf)
add_contrib (google-protobuf-cmake google-protobuf)
add_contrib (openldap-cmake openldap)
add_contrib (grpc-cmake grpc)
add_contrib (msgpack-c-cmake msgpack-c)
@ -156,7 +156,7 @@ add_contrib (libgsasl-cmake libgsasl) # requires krb5
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
add_contrib (nats-io-cmake nats-io)
add_contrib (isa-l-cmake isa-l)
add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)

1
contrib/google-protobuf vendored Submodule

@ -0,0 +1 @@
Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f

View File

@ -5,7 +5,7 @@ if(NOT ENABLE_PROTOBUF)
return()
endif()
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src")
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
if(OS_FREEBSD AND SANITIZE STREQUAL "address")
# ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
# #include <sanitizer/asan_interface.h>
@ -17,8 +17,8 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address")
endif()
endif()
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf")
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf")
set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf")
set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf")
add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD)
@ -35,7 +35,6 @@ set(libprotobuf_lite_files
${protobuf_source_dir}/src/google/protobuf/arena.cc
${protobuf_source_dir}/src/google/protobuf/arenastring.cc
${protobuf_source_dir}/src/google/protobuf/extension_set.cc
${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc
${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc
@ -86,6 +85,7 @@ set(libprotobuf_files
${protobuf_source_dir}/src/google/protobuf/empty.pb.cc
${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc
${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc
${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc
${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc
@ -316,7 +316,7 @@ else ()
add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc")
endif ()
include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake")
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
add_library(_protobuf INTERFACE)
target_link_libraries(_protobuf INTERFACE _libprotobuf)

1
contrib/protobuf vendored

@ -1 +0,0 @@
Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2

View File

@ -3,5 +3,5 @@
set -x
service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test '';
gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

View File

@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree](
The data of TIME type in MySQL is converted to microseconds in ClickHouse.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication.
Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication.
## Specifics and Recommendations {#specifics-and-recommendations}

View File

@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table;
```
:::warning
Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1.
:::
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is
1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege.
2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege.
3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser.

View File

@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe
## Specifics and Recommendations {#specifics-and-recommendations}
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked.
SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked.
SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself.
## Usage Example {#usage-example}

View File

@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| rpc\_client\_connect\_timeout | 600 * 1000 |
| rpc\_client\_read\_timeout | 3600 * 1000 |
| rpc\_client\_write\_timeout | 3600 * 1000 |
| rpc\_client\_socekt\_linger\_timeout | -1 |
| rpc\_client\_socket\_linger\_timeout | -1 |
| rpc\_client\_connect\_retry | 10 |
| rpc\_client\_timeout | 3600 * 1000 |
| dfs\_default\_replica | 3 |
@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
| output\_write\_timeout | 3600 * 1000 |
| output\_close\_timeout | 3600 * 1000 |
| output\_packetpool\_size | 1024 |
| output\_heeartbeat\_interval | 10 * 1000 |
| output\_heartbeat\_interval | 10 * 1000 |
| dfs\_client\_failover\_max\_attempts | 15 |
| dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 |
| dfs\_client\_socketcache\_expiryMsec | 3000 |

View File

@ -6,7 +6,7 @@ sidebar_label: Hive
# Hive
The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below:
The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below:
- Text: only supports simple scalar column types except `binary`

View File

@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
`NATS` lets you:
- Publish or subcribe to message subjects.
- Publish or subscribe to message subjects.
- Process new messages as they become available.
## Creating a Table {#table_engine-redisstreams-creating-a-table}
@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Required parameters:
- `nats_url` host:port (for example, `localhost:5672`)..
- `nats_subjects` List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_subjects` List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>`
- `nats_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:

View File

@ -57,7 +57,7 @@ or via config (since version 21.11):
</named_collections>
```
Some parameters can be overriden by key value arguments:
Some parameters can be overridden by key value arguments:
``` sql
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
```

View File

@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
- `format` — The [format](../../../interfaces/formats.md#formats) of the file.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension.
- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
### PARTITION BY
@ -140,8 +140,8 @@ The following settings can be set before query execution or placed into configur
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

View File

@ -78,7 +78,7 @@ ENGINE = MergeTree
ORDER BY id;
```
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
@ -135,7 +135,7 @@ ORDER BY id;
Annoy supports `L2Distance` and `cosineDistance`.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed.
__Example__:
``` sql

View File

@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that
The key factors for a good performance:
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine
- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine
- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing
- partitions should be comparable in size, so all threads will do roughly the same amount of work

View File

@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the `<storage_conf
:::tip
Disks can also be declared in the `SETTINGS` section of a query. This is useful
for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
for ad-hoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
See [dynamic storage](#dynamic-storage) for more details.
:::
@ -856,7 +856,7 @@ Tags:
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
Cofiguration examples:
Configuration examples:
``` xml
<storage_configuration>
@ -1224,7 +1224,7 @@ Limit parameters (mainly for internal usage):
* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object.
* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
Other parameters:
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.

View File

@ -65,7 +65,7 @@ if __name__ == "__main__":
main()
```
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`:
```sql
CREATE TABLE my_executable_table (
@ -223,4 +223,4 @@ SETTINGS
pool_size = 4;
```
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.

View File

@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys.
If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it.
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect.
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
## Supported operations {#table_engine-KeeperMap-supported-operations}

View File

@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category);
3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include.
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`:
```sql
INSERT INTO amazon_reviews
@ -473,4 +473,4 @@ It runs quite a bit faster - which means the cache is helping us out here:
└────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘
50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.)
```
```

View File

@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
:::
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
### Add the table **cell_towers** as a Superset **dataset**
@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number).
:::

View File

@ -806,7 +806,7 @@ FROM
31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.)
```
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion.
Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion.
## Authors with the most diverse impact
@ -940,7 +940,7 @@ LIMIT 10
10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.)
```
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions.
This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions.
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=)

View File

@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to
Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd
Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders
Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders
"Crowdsourced air traffic data from the OpenSky Network 20192020"
Earth System Science Data 13(2), 2021
https://doi.org/10.5194/essd-13-357-2021

View File

@ -542,7 +542,7 @@ LIMIT 10;
10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.)
```
11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019:
11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019:
```sql
SELECT
@ -718,4 +718,3 @@ ORDER BY quarter ASC;
└────────────┴────────────┴───────────┴──────────┘
70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.)
```

View File

@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only
## Step-by-step instructions
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result:
```sql
DESCRIBE s3Cluster(
@ -322,7 +322,7 @@ ORDER BY month ASC;
A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty).
### More subtitiles over time and when
### More subtitles over time and when
With advances in speech recognition, its easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then?
@ -484,4 +484,4 @@ ARRAY JOIN
│ 20th │ 16 │
│ 10th │ 6 │
└────────────┴─────────┘
```
```

View File

@ -275,9 +275,9 @@ Type: UInt64
Default: 1000
## max_concurrent_insert_queries
## max_concurrent_queries
Limit on total number of concurrent insert queries. Zero means Unlimited.
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -287,9 +287,9 @@ Type: UInt64
Default: 0
## max_concurrent_queries
## max_concurrent_insert_queries
Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited.
Limit on total number of concurrent insert queries. Zero means Unlimited.
:::note
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -1277,49 +1277,6 @@ For more information, see the section [Creating replicated tables](../../engines
<macros incl="macros" optional="true" />
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
```
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
Modifying the setting for one query or user does not affect other queries.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
```
**See Also**
- [max_concurrent_queries](#max-concurrent-queries)
## max_open_files {#max-open-files}
The maximum number of open files.
@ -1947,7 +1904,7 @@ Config fields:
- `regexp` - RE2 compatible regular expression (mandatory)
- `replace` - substitution string for sensitive data (optional, by default - six asterisks)
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries).
The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parseable queries).
`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches.

View File

@ -1182,7 +1182,7 @@ Possible values:
- `bin` - as 16-bytes binary.
- `str` - as a string of 36 bytes.
- `ext` - as extention with ExtType = 2.
- `ext` - as extension with ExtType = 2.
Default value: `ext`.

View File

@ -646,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
However, the block size cannot be more than `max_block_size` rows.
By default: 1,000,000. It only works when reading from MergeTree engines.
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
```
## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users}
Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
Modifying the setting for one query or user does not affect other queries.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
```
**See Also**
- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read}
If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
@ -1050,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client.
Default value: 10, 300, 300.
## handshake_timeout_ms {#handshake-timeout-ms}
Timeout in milliseconds for receiving Hello packet from replicas during handshake.
Default value: 10000.
## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close}
Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
@ -1107,7 +1155,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
@ -1128,7 +1176,7 @@ Default value: `0`.
Could be used for throttling speed when replicating the data to add or replace new nodes.
:::note
60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8).
:::
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
@ -2030,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock
## distributed_push_down_limit {#distributed-push-down-limit}
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
Enables or disables [LIMIT](#limit) applying on each shard separately.
This will allow to avoid:
- Sending extra rows over network;
@ -2431,7 +2479,7 @@ Default value: 0.
## allow_introspection_functions {#settings-allow_introspection_functions}
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
Possible values:
@ -3492,7 +3540,7 @@ Default value: `0`.
## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds.
Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
Possible values:

View File

@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.

View File

@ -12,7 +12,7 @@ Columns:
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadata_dropped directory.
- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
**Example**

View File

@ -43,7 +43,7 @@ Columns:
- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type.
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.

View File

@ -3,7 +3,7 @@ slug: /en/operations/system-tables/licenses
---
# licenses
Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
Columns:

View File

@ -100,7 +100,7 @@ Columns:
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
:::note
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simplest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
:::
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).

View File

@ -14,8 +14,8 @@ Columns:
- `['user_name']` — Connections with the same user name share the same quota.
- `['ip_address']` — Connections from the same IP share the same quota.
- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the qouta is tracked for `ip_address`.
- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the quota is tracked for `user_name`.
- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isnt provided by a client, the quota is tracked for `ip_address`.
- `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds.
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
- `0` — The quota applies to users specify in the `apply_to_list`.

View File

@ -50,7 +50,7 @@ Columns:
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table).
- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underlying `Buffer` table).
- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage).

View File

@ -43,7 +43,7 @@ Columns:
- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string.
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0.
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0.
**Example**

View File

@ -33,7 +33,7 @@ Columns with request response parameters:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
- `ZOK` — The request was executed seccessfully.
- `ZOK` — The request was executed successfully.
- `ZCONNECTIONLOSS` — The connection was lost.
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
- `ZSESSIONEXPIRED` — The session has expired.
@ -43,7 +43,7 @@ Columns with request response parameters:
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified children of this ZooKeeper node.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.

View File

@ -0,0 +1,53 @@
---
slug: /en/operations/utilities/clickhouse-keeper-client
sidebar_label: clickhouse-keeper-client
---
# clickhouse-keeper-client
A client application to interact with clickhouse-keeper by its native protocol.
## Keys {#clickhouse-keeper-client}
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
- `-p N`, `--port=N` — Server port. Default value: 2181
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
- `--help` — Shows the help message.
## Example {#clickhouse-keeper-client-example}
```bash
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
Connected to ZooKeeper at [::1]:2181 with session_id 137
/ :) ls
keeper foo bar
/ :) cd keeper
/keeper :) ls
api_version
/keeper :) cd api_version
/keeper/api_version :) ls
/keeper/api_version :) cd xyz
Path /keeper/api_version/xyz does not exists
/keeper/api_version :) cd ../../
/ :) ls
keeper foo bar
/ :) get keeper/api_version
2
```
## Commands {#clickhouse-keeper-client-commands}
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
- `create <path> <value>` -- Creates new node
- `get <path>` -- Returns the node's value
- `remove <path>` -- Remove the node
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message

View File

@ -24,7 +24,7 @@ It is designed to retain the following properties of data:
Most of the properties above are viable for performance testing:
reading data, filtering, aggregatio, and sorting will work at almost the same speed
reading data, filtering, aggregation, and sorting will work at almost the same speed
as on original data due to saved cardinalities, magnitudes, compression ratios, etc.
It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed.

View File

@ -356,7 +356,7 @@ Type: `UInt8`.
Lets consider an example of calculating the `retention` function to determine site traffic.
**1.** Сreate a table to illustrate an example.
**1.** Create a table to illustrate an example.
``` sql
CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory;

View File

@ -0,0 +1,44 @@
---
slug: /en/sql-reference/aggregate-functions/reference/boundingRatio
sidebar_position: 2
title: boundingRatio
---
Aggregate function that calculates the slope between the leftmost and rightmost points across a group of values.
Example:
Sample data:
```sql
SELECT
number,
number * 1.5
FROM numbers(10)
```
```response
┌─number─┬─multiply(number, 1.5)─┐
│ 0 │ 0 │
│ 1 │ 1.5 │
│ 2 │ 3 │
│ 3 │ 4.5 │
│ 4 │ 6 │
│ 5 │ 7.5 │
│ 6 │ 9 │
│ 7 │ 10.5 │
│ 8 │ 12 │
│ 9 │ 13.5 │
└────────┴───────────────────────┘
```
The boundingRatio() function returns the slope of the line between the leftmost and rightmost points, in the above data these points are `(0,0)` and `(9,13.5)`.
```sql
SELECT boundingRatio(number, number * 1.5)
FROM numbers(10)
```
```response
┌─boundingRatio(number, multiply(number, 1.5))─┐
│ 1.5 │
└──────────────────────────────────────────────┘
```

View File

@ -5,7 +5,7 @@ sidebar_position: 351
# cramersV
[Cramér's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramér's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation.
**Syntax**
@ -69,4 +69,4 @@ Result:
┌─────cramersV(a, b)─┐
│ 0.8944271909999159 │
└────────────────────┘
```
```

View File

@ -6,7 +6,7 @@ sidebar_position: 352
# cramersVBiasCorrected
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).

View File

@ -6,7 +6,7 @@ sidebar_title: exponentialMovingAverage
## exponentialMovingAverage
Сalculates the exponential moving average of values for the determined time.
Calculates the exponential moving average of values for the determined time.
**Syntax**
@ -27,7 +27,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
**Returned values**
- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
- Returns an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).

View File

@ -5,7 +5,7 @@ sidebar_position: 125
# groupBitAnd
Applies bitwise `AND` for series of numbers.
Applies bit-wise `AND` for series of numbers.
``` sql
groupBitAnd(expr)

View File

@ -5,7 +5,7 @@ sidebar_position: 126
# groupBitOr
Applies bitwise `OR` for series of numbers.
Applies bit-wise `OR` for series of numbers.
``` sql
groupBitOr(expr)

View File

@ -5,7 +5,7 @@ sidebar_position: 127
# groupBitXor
Applies bitwise `XOR` for series of numbers.
Applies bit-wise `XOR` for series of numbers.
``` sql
groupBitXor(expr)

View File

@ -9,74 +9,75 @@ toc_hidden: true
Standard aggregate functions:
- [count](../../../sql-reference/aggregate-functions/reference/count.md)
- [min](../../../sql-reference/aggregate-functions/reference/min.md)
- [max](../../../sql-reference/aggregate-functions/reference/max.md)
- [sum](../../../sql-reference/aggregate-functions/reference/sum.md)
- [avg](../../../sql-reference/aggregate-functions/reference/avg.md)
- [any](../../../sql-reference/aggregate-functions/reference/any.md)
- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md)
- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md)
- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)
- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md)
- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md)
- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md)
- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md)
- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md)
- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md)
- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md)
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md)
- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md)
ClickHouse-specific aggregate functions:
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md)
- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md)
- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md)
- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md)
- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md)
- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md)
- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md)
- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md)
- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md)
- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md)
- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md)
- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md)
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md)
- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md)
- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md)
- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md)
- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md)
- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md)
- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md)
- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md)
- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md)
- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md)
- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md)
- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md)
- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md)
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)
- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md)
- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md)
- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md)
- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md)
- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md)
- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md)
- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md)
- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md)
- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md)
- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md)
- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md)
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
- [contingency](./contingency.md)
- [cramersV](./cramersv.md)
- [cramersVBiasCorrected](./cramersvbiascorrected.md)

View File

@ -30,11 +30,11 @@ Samples must belong to continuous, one-dimensional probability distributions.
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
And the alternative is that the distributions are not identical.
- `'greater'`
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
The null hypothesis is that values in the first sample are *stochastically smaller* than those in the second one,
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
- `'less'`.
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
The null hypothesis is that values in the first sample are *stochastically greater* than those in the second one,
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).

View File

@ -14,7 +14,7 @@ The result depends on the order of running the query, and is nondeterministic.
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
:::note
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significant error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead.
:::
**Syntax**

View File

@ -18,7 +18,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`.
2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`.
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`.
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods.
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
### Usage

View File

@ -22,7 +22,7 @@ Resolution: 1 second.
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (2020-01-01 05:00:01).
Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
Timezone agnostic Unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions etc.). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata.
A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia.
@ -30,7 +30,7 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isnt explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
@ -120,9 +120,9 @@ FROM dt
As timezone conversion only changes the metadata, the operation has no computation cost.
## Limitations on timezones support
## Limitations on time zones support
Some timezones may not be supported completely. There are a few cases:
Some time zones may not be supported completely. There are a few cases:
If the offset from UTC is not a multiple of 15 minutes, the calculation of hours and minutes can be incorrect. For example, the time zone in Monrovia, Liberia has offset UTC -0:44:30 before 7 Jan 1972. If you are doing calculations on the historical time in Monrovia timezone, the time processing functions may give incorrect results. The results after 7 Jan 1972 will be correct nevertheless.

View File

@ -27,7 +27,7 @@ ClickHouse data types include:
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type)
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
- **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)

View File

@ -247,7 +247,7 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
### hashed
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type.
@ -984,7 +984,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
...
```
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronous and asynchronous updates are supported.
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
@ -1243,8 +1243,8 @@ Setting fields:
- `password` Password required for the authentication.
- `headers` All custom HTTP headers entries used for the HTTP request. Optional parameter.
- `header` Single HTTP header entry.
- `name` Identifiant name used for the header send on the request.
- `value` Value set for a specific identifiant name.
- `name` Identifier name used for the header send on the request.
- `value` Value set for a specific identifier name.
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.

View File

@ -140,7 +140,7 @@ range([start, ] end [, step])
**Implementation details**
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
**Examples**
@ -1236,7 +1236,7 @@ arrayAUC(arr_scores, arr_labels)
**Arguments**
- `arr_scores` — scores prediction model gives.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
**Returned value**

View File

@ -226,7 +226,7 @@ Result:
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
The conjuction for bitwise operations:
The conjuction for bit-wise operations:
0 AND 0 = 0
@ -291,7 +291,7 @@ Result:
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
The disjunction for bitwise operations:
The disjunction for bit-wise operations:
0 OR 0 = 0

View File

@ -487,7 +487,7 @@ cosineDistance(vector1, vector2)
**Returned value**
- Cosine of the angle between two vectors substracted from one.
- Cosine of the angle between two vectors subtracted from one.
Type: [Float](../../sql-reference/data-types/float.md).

View File

@ -31,9 +31,9 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Arguments**
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
@ -165,7 +165,7 @@ Received exception from server (version 22.6.1):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
While `aes_encrypt_mysql` produces MySQL-compatitalbe output:
While `aes_encrypt_mysql` produces MySQL-compatible output:
Query:
@ -233,7 +233,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string).
- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
**Returned value**
@ -364,7 +364,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string).
**Returned value**

View File

@ -6,7 +6,7 @@ sidebar_label: Files
## file
Reads file as string and loads the data into the specified column. The actual file content is not interpreted.
Reads a file as string and loads the data into the specified column. The file content is not interpreted.
Also see table function [file](../table-functions/file.md).
@ -18,15 +18,13 @@ file(path[, default])
**Arguments**
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
Inserting data from files a.txt and b.txt into a table as strings:
Query:
``` sql
INSERT INTO table SELECT file('a.txt'), file('b.txt');
```

View File

@ -8,7 +8,7 @@ sidebar_label: Nullable
## isNull
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
``` sql
isNull(x)
@ -18,7 +18,7 @@ Alias: `ISNULL`.
**Arguments**
- `x` — A value with a non-compound data type.
- `x` — A value of non-compound data type.
**Returned value**
@ -27,7 +27,7 @@ Alias: `ISNULL`.
**Example**
Input table
Table:
``` text
┌─x─┬────y─┐
@ -36,12 +36,14 @@ Input table
└───┴──────┘
```
Query
Query:
``` sql
SELECT x FROM t_null WHERE isNull(y);
```
Result:
``` text
┌─x─┐
│ 1 │
@ -50,7 +52,7 @@ SELECT x FROM t_null WHERE isNull(y);
## isNotNull
Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal).
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
``` sql
isNotNull(x)
@ -58,16 +60,16 @@ isNotNull(x)
**Arguments:**
- `x` — A value with a non-compound data type.
- `x` — A value of non-compound data type.
**Returned value**
- `0` if `x` is `NULL`.
- `1` if `x` is not `NULL`.
- `0` if `x` is `NULL`.
**Example**
Input table
Table:
``` text
┌─x─┬────y─┐
@ -76,12 +78,14 @@ Input table
└───┴──────┘
```
Query
Query:
``` sql
SELECT x FROM t_null WHERE isNotNull(y);
```
Result:
``` text
┌─x─┐
│ 2 │
@ -90,7 +94,7 @@ SELECT x FROM t_null WHERE isNotNull(y);
## coalesce
Checks from left to right whether `NULL` arguments were passed and returns the first non-`NULL` argument.
Returns the leftmost non-`NULL` argument.
``` sql
coalesce(x,...)
@ -98,11 +102,11 @@ coalesce(x,...)
**Arguments:**
- Any number of parameters of a non-compound type. All parameters must be compatible by data type.
- Any number of parameters of non-compound type. All parameters must be of mutually compatible data types.
**Returned values**
- The first non-`NULL` argument.
- The first non-`NULL` argument
- `NULL`, if all arguments are `NULL`.
**Example**
@ -110,10 +114,10 @@ coalesce(x,...)
Consider a list of contacts that may specify multiple ways to contact a customer.
``` text
┌─name─────┬─mail─┬─phone─────┬──icq─┐
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└──────────┴──────┴───────────┴──────┘
┌─name─────┬─mail─┬─phone─────┬──telegram─┐
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└──────────┴──────┴───────────┴───────────
```
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
@ -121,22 +125,22 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32
Get the first available contact method for the customer from the contact list:
``` sql
SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook;
SELECT name, coalesce(mail, phone, CAST(telegram,'Nullable(String)')) FROM aBook;
```
``` text
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
│ client 1 │ 123-45-67 │
│ client 2 │ ᴺᵁᴸᴸ │
└──────────┴──────────────────────────────────────────────────────┘
┌─name─────┬─coalesce(mail, phone, CAST(telegram, 'Nullable(String)'))─┐
│ client 1 │ 123-45-67
│ client 2 │ ᴺᵁᴸᴸ
└──────────┴───────────────────────────────────────────────────────────
```
## ifNull
Returns an alternative value if the main argument is `NULL`.
Returns an alternative value if the argument is `NULL`.
``` sql
ifNull(x,alt)
ifNull(x, alt)
```
**Arguments:**
@ -146,25 +150,33 @@ ifNull(x,alt)
**Returned values**
- The value `x`, if `x` is not `NULL`.
- The value `alt`, if `x` is `NULL`.
- `x` if `x` is not `NULL`.
- `alt` if `x` is `NULL`.
**Example**
Query:
``` sql
SELECT ifNull('a', 'b');
```
Result:
``` text
┌─ifNull('a', 'b')─┐
│ a │
└──────────────────┘
```
Query:
``` sql
SELECT ifNull(NULL, 'b');
```
Result:
``` text
┌─ifNull(NULL, 'b')─┐
│ b │
@ -173,7 +185,7 @@ SELECT ifNull(NULL, 'b');
## nullIf
Returns `NULL` if the arguments are equal.
Returns `NULL` if both arguments are equal.
``` sql
nullIf(x, y)
@ -181,29 +193,37 @@ nullIf(x, y)
**Arguments:**
`x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception.
`x`, `y` — Values to compare. Must be of compatible types.
**Returned values**
- `NULL`, if the arguments are equal.
- The `x` value, if the arguments are not equal.
- `NULL` if the arguments are equal.
- `x` if the arguments are not equal.
**Example**
Query:
``` sql
SELECT nullIf(1, 1);
```
Result:
``` text
┌─nullIf(1, 1)─┐
│ ᴺᵁᴸᴸ │
└──────────────┘
```
Query:
``` sql
SELECT nullIf(1, 2);
```
Result:
``` text
┌─nullIf(1, 2)─┐
│ 1 │
@ -212,7 +232,7 @@ SELECT nullIf(1, 2);
## assumeNotNull
Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions.
Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
``` sql
assumeNotNull(x)
@ -224,36 +244,29 @@ assumeNotNull(x)
**Returned values**
- The original value from the non-`Nullable` type, if it is not `NULL`.
- Implementation specific result if the original value was `NULL`.
- The input value as non-`Nullable` type, if it is not `NULL`.
- An arbitrary value, if the input value is `NULL`.
**Example**
Consider the `t_null` table.
``` sql
SHOW CREATE TABLE t_null;
```
Table:
``` text
┌─statement─────────────────────────────────────────────────────────────────┐
│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │
└───────────────────────────────────────────────────────────────────────────┘
```
``` text
┌─x─┬────y─┐
│ 1 │ ᴺᵁᴸᴸ │
│ 2 │ 3 │
└───┴──────┘
```
Apply the `assumeNotNull` function to the `y` column.
Query:
``` sql
SELECT assumeNotNull(y) FROM t_null;
SELECT assumeNotNull(y) FROM table;
```
Result:
``` text
┌─assumeNotNull(y)─┐
│ 0 │
@ -261,10 +274,14 @@ SELECT assumeNotNull(y) FROM t_null;
└──────────────────┘
```
Query:
``` sql
SELECT toTypeName(assumeNotNull(y)) FROM t_null;
```
Result:
``` text
┌─toTypeName(assumeNotNull(y))─┐
│ Int8 │
@ -282,28 +299,36 @@ toNullable(x)
**Arguments:**
- `x`The value of any non-compound type.
- `x`A value of non-compound type.
**Returned value**
- The input value with a `Nullable` type.
- The input value but of `Nullable` type.
**Example**
Query:
``` sql
SELECT toTypeName(10);
```
Result:
``` text
┌─toTypeName(10)─┐
│ UInt8 │
└────────────────┘
```
Query:
``` sql
SELECT toTypeName(toNullable(10));
```
Result:
``` text
┌─toTypeName(toNullable(10))─┐
│ Nullable(UInt8) │

View File

@ -12,7 +12,7 @@ A latitude and longitude pair can be transformed to a 64-bit H3 index, identifyi
The H3 index is used primarily for bucketing locations and other geospatial manipulations.
The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/).
The full description of the H3 system is available at [the Uber Engineering site](https://eng.uber.com/h3/).
## h3IsValid

View File

@ -249,7 +249,7 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
**Returned values**
- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
**Example**

View File

@ -0,0 +1,52 @@
---
slug: /en/sql-reference/functions/geo/svg
sidebar_label: SVG
title: "Functions for Generating SVG images from Geo data"
---
## Syntax
``` sql
SVG(geometry,[style])
```
### Parameters
- `geometry` — Geo data
- `style` — Optional style name
### Returned value
- The SVG representation of the geometry:
- SVG circle
- SVG polygon
- SVG path
Type: String
## Examples
### Circle
```sql
SELECT SVG((0., 0.))
```
```response
<circle cx="0" cy="0" r="5" style=""/>
```
### Polygon
```sql
SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)])
```
```response
<polygon points="0,0 0,10 10,10 10,0 0,0" style=""/>
```
### Path
```sql
SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]])
```
```response
<g fill-rule="evenodd"><path d="M 0,0 L 0,10 L 10,10 L 10,0 L 0,0M 4,4 L 5,4 L 5,5 L 4,5 L 4,4 z " style=""/></g>
```

View File

@ -697,7 +697,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
## gccMurmurHash
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between CLang and GCC builds.
Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between Clang and GCC builds.
**Syntax**
@ -1161,7 +1161,7 @@ wordShingleSimHashUTF8(string[, shinglesize])
**Arguments**
- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
**Returned value**

View File

@ -12,7 +12,9 @@ Zero as an argument is considered `false`, non-zero values are considered `true`
## and
Calculates the logical conjunction between two or more values.
Calculates the logical conjunction of two or more values.
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
**Syntax**
@ -20,9 +22,7 @@ Calculates the logical conjunction between two or more values.
and(val1, val2...)
```
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`.
Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator).
Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-operator).
**Arguments**
@ -30,8 +30,8 @@ Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-ope
**Returned value**
- `0`, if there at least one argument evaluates to `false`,
- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`,
- `0`, if at least one argument evaluates to `false`,
- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`,
- `1`, otherwise.
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
@ -66,7 +66,9 @@ Result:
## or
Calculates the logical disjunction between two or more values.
Calculates the logical disjunction of two or more values.
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
**Syntax**
@ -74,9 +76,7 @@ Calculates the logical disjunction between two or more values.
or(val1, val2...)
```
Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`.
Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator).
Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-operator).
**Arguments**
@ -120,7 +120,7 @@ Result:
## not
Calculates logical negation of a value.
Calculates the logical negation of a value.
**Syntax**
@ -128,7 +128,7 @@ Calculates logical negation of a value.
not(val);
```
Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator).
Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-negation-operator).
**Arguments**
@ -158,7 +158,7 @@ Result:
## xor
Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc.
Calculates the logical exclusive disjunction of two or more values. For more than two input values, the function first xor-s the first two values, then xor-s the result with the third value etc.
**Syntax**

View File

@ -52,7 +52,7 @@ Alias: `ln(x)`
## exp2
Returns 2 to the power of the given argumetn
Returns 2 to the power of the given argument
**Syntax**
@ -82,7 +82,7 @@ log2(x)
## exp10
Returns 10 to the power of the given argumetn
Returns 10 to the power of the given argument.
**Syntax**

File diff suppressed because it is too large Load Diff

View File

@ -31,7 +31,7 @@ Uses a linear congruential generator.
## randCanonical
Returns a Float64 value, evenly distributed in [0, 1).
Returns a random Float64 value, evenly distributed in interval [0, 1).
## randConstant
@ -54,11 +54,9 @@ Result:
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
```
# Functions for Generating Random Numbers based on a Distribution
## randUniform
Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)).
**Syntax**
@ -68,8 +66,8 @@ randUniform(min, max)
**Arguments**
- `min` - `Float64` - min value of the range,
- `max` - `Float64` - max value of the range.
- `min` - `Float64` - left boundary of the range,
- `max` - `Float64` - right boundary of the range.
**Returned value**
@ -97,7 +95,7 @@ Result:
## randNormal
Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution).
**Syntax**
@ -108,7 +106,7 @@ randNormal(mean, variance)
**Arguments**
- `mean` - `Float64` - mean value of distribution,
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
**Returned value**
@ -136,7 +134,7 @@ Result:
## randLogNormal
Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
Returns a random Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution).
**Syntax**
@ -147,7 +145,7 @@ randLogNormal(mean, variance)
**Arguments**
- `mean` - `Float64` - mean value of distribution,
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance).
- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution.
**Returned value**
@ -175,7 +173,7 @@ Result:
## randBinomial
Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution).
**Syntax**
@ -186,7 +184,7 @@ randBinomial(experiments, probability)
**Arguments**
- `experiments` - `UInt64` - number of experiments,
- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only).
- `probability` - `Float64` - probability of success in each experiment, a value between 0 and 1.
**Returned value**
@ -214,7 +212,7 @@ Result:
## randNegativeBinomial
Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
Returns a random UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution).
**Syntax**
@ -225,7 +223,7 @@ randNegativeBinomial(experiments, probability)
**Arguments**
- `experiments` - `UInt64` - number of experiments,
- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only).
- `probability` - `Float64` - probability of failure in each experiment, a value between 0 and 1.
**Returned value**
@ -253,7 +251,7 @@ Result:
## randPoisson
Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution).
**Syntax**
@ -291,7 +289,7 @@ Result:
## randBernoulli
Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution).
**Syntax**
@ -301,7 +299,7 @@ randBernoulli(probability)
**Arguments**
- `probability` - `Float64` - probability of success (values in `0...1` range only).
- `probability` - `Float64` - probability of success, a value between 0 and 1.
**Returned value**
@ -329,7 +327,7 @@ Result:
## randExponential
Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
Returns a random Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution).
**Syntax**
@ -367,7 +365,7 @@ Result:
## randChiSquared
Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables.
**Syntax**
@ -405,7 +403,7 @@ Result:
## randStudentT
Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
Returns a random Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution).
**Syntax**
@ -443,7 +441,7 @@ Result:
## randFisherF
Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution).
**Syntax**
@ -480,47 +478,160 @@ Result:
└─────────────────────┘
```
# Functions for Generating Random Strings
## randomString
Returns a random String of specified `length`. Not all characters may be printable.
Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
**Syntax**
```sql
``` sql
randomString(length)
```
**Arguments**
- `length` — String length in bytes. Positive integer.
**Returned value**
- String filled with random bytes.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
str: 3 G : pT ?w тi k aV f6
len: 30
Row 2:
──────
str: 9 ,] ^ ) ]?? 8
len: 30
```
## randomFixedString
Like `randomString` but returns a FixedString.
## randomPrintableASCII
Returns a random String of specified `length`. All characters are printable.
Generates a binary string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
**Syntax**
``` sql
randomFixedString(length);
```
**Arguments**
- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- String filled with random bytes.
Type: [FixedString](../../sql-reference/data-types/fixedstring.md).
**Example**
Query:
```sql
SELECT randomFixedString(13) as rnd, toTypeName(rnd)
```
Result:
```text
┌─rnd──────┬─toTypeName(randomFixedString(13))─┐
│ j▒h㋖HɨZ'▒ │ FixedString(13) │
└──────────┴───────────────────────────────────┘
```
## randomPrintableASCII
Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) characters. All characters are printable.
If you pass `length < 0`, the behavior of the function is undefined.
**Syntax**
``` sql
randomPrintableASCII(length)
```
**Arguments**
- `length` — String length in bytes. Positive integer.
**Returned value**
- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters.
Type: [String](../../sql-reference/data-types/string.md)
**Example**
``` sql
SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3
```
``` text
┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐
│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │
│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │
│ 2 │ /"+<"wUTh:=LjJ Vm!c&hI*m#XTfzz │ 30 │
└────────┴────────────────────────────────┴──────────────────────────────────┘
```
## randomStringUTF8
Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable
Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode.
**Syntax**
``` sql
randomStringUTF8(length);
```
**Arguments**
- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- UTF-8 random string.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
```sql
randomStringUTF8(length)
SELECT randomStringUTF8(13)
```
Result:
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
```
## fuzzBits
**Syntax**
Inverts the bits of String or FixedString `s`, each with probability `prob`.
Flips the bits of String or FixedString `s`, each with probability `prob`.
**Syntax**
@ -529,8 +640,8 @@ fuzzBits(s, prob)
```
**Arguments**
- `s` - `String` or `FixedString`
- `prob` - constant `Float32/64`
- `s` - `String` or `FixedString`,
- `prob` - constant `Float32/64` between 0.0 and 1.0.
**Returned value**

View File

@ -393,7 +393,7 @@ Reverses a sequence of Unicode code points in a string. Assumes that the string
## format
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers).
Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers).
**Syntax**

View File

@ -6,7 +6,7 @@ sidebar_label: Replacing in Strings
# Functions for Replacing in Strings
[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately.
[General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately.
## replaceOne

View File

@ -793,7 +793,7 @@ toDecimalString(number, scale)
**Returned value**
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale.
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
**Example**

View File

@ -19,7 +19,7 @@ A function configuration contains the following settings:
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.

View File

@ -222,7 +222,7 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case
### Distributed Subqueries and max_rows_in_set
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries.
You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries.
This is specially important if the `global in` query returns a large amount of data. Consider the following sql -
```sql

View File

@ -24,7 +24,7 @@ For tuple negation: [tupleNegate](../../sql-reference/functions/tuple-functions.
`a * b` The `multiply (a, b)` function.
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar profuct: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar product: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct).
`a / b` The `divide(a, b)` function.

View File

@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
``` sql
ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;

View File

@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15
CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER;
```
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters:
For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters:
``` sql
CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default;

View File

@ -127,7 +127,7 @@ CROSS JOIN system.numbers AS c
Settings:
- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
- `run_passes` — Run all query tree passes before dumping the query tree. Default: `1`.
- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
@ -475,5 +475,5 @@ Result:
```
:::note
The validation is not complete, so a successfull query does not guarantee that the override would not cause issues.
The validation is not complete, so a successful query does not guarantee that the override would not cause issues.
:::

View File

@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
- Data is merged during query execution.
- Queries with `FINAL` read primary key columns in addition to the columns specified in the query.
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates).
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havent happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.

View File

@ -289,7 +289,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns.
Example of a query without `WITH FILL`:

View File

@ -11,7 +11,7 @@ Allows to populate test tables with data.
Not all types are supported.
``` sql
generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]])
generateRandom(['name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]])
```
**Arguments**
@ -53,5 +53,49 @@ SELECT * FROM random;
└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
```
In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generateRandomStructure):
```sql
SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3;
```
```text
┌──────────────────c1─┬──────────────────c2─┬─c3─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c4──────────────────────────────────────┐
│ 1996-04-15 06:40:05 │ 33954608387.2844801 │ ['232.78.216.176','9.244.59.211','211.21.80.152','44.49.94.109','165.77.195.182','68.167.134.239','212.13.24.185','1.197.255.35','192.55.131.232'] │ 45d9:2b52:ab6:1c59:185b:515:c5b6:b781 │
│ 2063-01-13 01:22:27 │ 36155064970.9514454 │ ['176.140.188.101'] │ c65a:2626:41df:8dee:ec99:f68d:c6dd:6b30 │
│ 2090-02-28 14:50:56 │ 3864327452.3901373 │ ['155.114.30.32'] │ 57e9:5229:93ab:fbf3:aae7:e0e4:d1eb:86b │
└─────────────────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┘
```
With missing `structure` argument (in this case the structure is random):
```sql
SELECT * FROM generateRandom() LIMIT 3;
```
```text
┌───c1─┬─────────c2─┬─────────────────────c3─┬──────────────────────c4─┬─c5───────┐
│ -128 │ 317300854 │ 2030-08-16 08:22:20.65 │ 1994-08-16 12:08:56.745 │ R0qgiC46 │
│ 40 │ -744906827 │ 2059-04-16 06:31:36.98 │ 1975-07-16 16:28:43.893 │ PuH4M*MZ │
│ -55 │ 698652232 │ 2052-08-04 20:13:39.68 │ 1998-09-20 03:48:29.279 │ │
└──────┴────────────┴────────────────────────┴─────────────────────────┴──────────┘
```
With random seed both for random structure and random data:
```sql
SELECT * FROM generateRandom(11) LIMIT 3;
```
```text
┌───────────────────────────────────────c1─┬─────────────────────────────────────────────────────────────────────────────c2─┬─────────────────────────────────────────────────────────────────────────────c3─┬─────────c4─┬─────────────────────────────────────────────────────────────────────────────c5─┬──────────────────────c6─┬─c7──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c8──────────────────────────────────────┬─────────c9─┐
│ -77422512305044606600216318673365695785 │ 636812099959807642229.503817849012019401335326013846687285151335352272727523 │ -34944452809785978175157829109276115789694605299387223845886143311647505037529 │ 544473976 │ 111220388331710079615337037674887514156741572807049614590010583571763691328563 │ 22016.22623506465 │ {'2052-01-31 20:25:33':4306400876908509081044405485378623663,'1993-04-16 15:58:49':164367354809499452887861212674772770279,'2101-08-19 03:07:18':-60676948945963385477105077735447194811,'2039-12-22 22:31:39':-59227773536703059515222628111999932330} │ a7b2:8f58:4d07:6707:4189:80cf:92f5:902d │ 1950-07-14 │
│ -159940486888657488786004075627859832441 │ 629206527868163085099.8195700356331771569105231840157308480121506729741348442 │ -53203761250367440823323469081755775164053964440214841464405368882783634063735 │ 2187136525 │ 94881662451116595672491944222189810087991610568040618106057495823910493624275 │ 1.3095786748458954e-104 │ {} │ a051:e3da:2e0a:c69:7835:aed6:e8b:3817 │ 1943-03-25 │
│ -5239084224358020595591895205940528518 │ -529937657954363597180.1709207212648004850138812370209091520162977548101577846 │ 47490343304582536176125359129223180987770215457970451211489086575421345731671 │ 1637451978 │ 101899445785010192893461828129714741298630410942962837910400961787305271699002 │ 2.4344456058391296e223 │ {'2013-12-22 17:42:43':80271108282641375975566414544777036006,'2041-03-08 10:28:17':169706054082247533128707458270535852845,'1986-08-31 23:07:38':-54371542820364299444195390357730624136,'2094-04-23 21:26:50':7944954483303909347454597499139023465} │ 1293:a726:e899:9bfc:8c6f:2aa1:22c9:b635 │ 1924-11-20 │
└──────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────┴─────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┴────────────┘
```
**Note:** `generateRandom(generateRandomStructure(), [random seed], max_string_length, max_array_length)` with large enough `max_array_length` can generate really huge output due to possible big nesting depth (up to 16) of complex types (`Array`, `Tuple`, `Map`, `Nested`).
## Related content
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)

View File

@ -6,7 +6,7 @@ sidebar_label: urlCluster
# urlCluster Table Function
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisk in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
**Syntax**
@ -59,4 +59,4 @@ Character `|` inside patterns is used to specify failover addresses. They are it
**See Also**
- [HDFS engine](../../engines/table-engines/special/url.md)
- [URL table function](../../sql-reference/table-functions/url.md)
- [URL table function](../../sql-reference/table-functions/url.md)

View File

@ -21,7 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
| `lag/lead(value, offset)` | Not supported. Workarounds: |
| | 1) replace with `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` |
| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unounded following). |
| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
## ClickHouse-specific Window Functions
@ -39,7 +39,7 @@ The computed value is the following for each row:
The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097).
All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag.
### Tests

View File

@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test
При помощи [интефейса CLI](../../interfaces/cli.md):
``` bash
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow"
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
```
Чтобы не вставлять данные вручную, используйте одну из [готовых библиотек](../../interfaces/index.md).
@ -31,4 +31,4 @@ $ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JS
:::note "Примечание"
В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`.
:::
:::

View File

@ -55,6 +55,8 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_
option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_KEEPER_CLIENT "ClickHouse Keeper Client" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL})
@ -169,6 +171,13 @@ else()
message(STATUS "ClickHouse keeper-converter mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
message(STATUS "ClickHouse keeper-client mode: ON")
else()
message(STATUS "ClickHouse keeper-client mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_DISKS)
message(STATUS "Clickhouse disks mode: ON")
else()
@ -237,6 +246,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_subdirectory (keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_subdirectory (keeper-client)
endif()
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)
endif ()
@ -301,6 +314,9 @@ endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
clickhouse_target_link_split_lib(clickhouse keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
clickhouse_target_link_split_lib(clickhouse keeper-client)
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
@ -392,6 +408,11 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -17,6 +17,7 @@
#cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CLIENT
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER
#cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
#cmakedefine01 ENABLE_CLICKHOUSE_SU

View File

@ -0,0 +1,9 @@
set (CLICKHOUSE_KEEPER_CLIENT_SOURCES KeeperClient.cpp Parser.cpp Commands.cpp)
set (CLICKHOUSE_KEEPER_CLIENT_LINK
PRIVATE
boost::program_options
dbms
)
clickhouse_program_add(keeper-client)

View File

@ -0,0 +1,196 @@
#include "Commands.h"
#include "KeeperClient.h"
namespace DB
{
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path;
if (!query->args.empty())
path = client->getAbsolutePath(query->args[0].safeGet<String>());
else
path = client->cwd;
for (const auto & child : client->zookeeper->getChildren(path))
std::cout << child << " ";
std::cout << "\n";
}
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
if (query->args.empty())
return;
auto new_path = client->getAbsolutePath(query->args[0].safeGet<String>());
if (!client->zookeeper->exists(new_path))
std::cerr << "Path " << new_path << " does not exists\n";
else
client->cwd = new_path;
}
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
ASTPtr version;
if (ParserNumber{}.parse(pos, version, expected))
node->args.push_back(version->as<ASTLiteral &>().value);
return true;
}
void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
if (query->args.size() == 2)
client->zookeeper->set(client->getAbsolutePath(query->args[0].safeGet<String>()), query->args[1].safeGet<String>());
else
client->zookeeper->set(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<Int32>(query->args[2].safeGet<Int64>()));
}
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
int mode = zkutil::CreateMode::Persistent;
if (ParserKeyword{"PERSISTENT"}.ignore(pos, expected))
mode = zkutil::CreateMode::Persistent;
else if (ParserKeyword{"EPHEMERAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::Ephemeral;
else if (ParserKeyword{"EPHEMERAL SEQUENTIAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::EphemeralSequential;
else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::PersistentSequential;
node->args.push_back(mode);
return true;
}
void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
client->zookeeper->create(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<int>(query->args[2].safeGet<Int64>()));
}
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
}
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
client->askConfirmation("You are going to recursively delete path " + path,
[client, path]{ client->zookeeper->removeRecursive(path); });
}
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;
}
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
{
for (const auto & pair : KeeperClient::commands)
std::cout << pair.second->getHelpMessage() << "\n";
}
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
expected.add(pos, "four-letter-word command");
if (pos->type != TokenType::BareWord)
return false;
String cmd(pos->begin, pos->end);
if (cmd.size() != 4)
return false;
++pos;
node->args.push_back(std::move(cmd));
return true;
}
void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
std::cout << client->executeFourLetterCommand(query->args[0].safeGet<String>()) << "\n";
}
}

View File

@ -0,0 +1,131 @@
#pragma once
#include "Parser.h"
namespace DB
{
class KeeperClient;
class IKeeperClientCommand
{
public:
static const String name;
virtual bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const = 0;
virtual void execute(const ASTKeeperQuery * query, KeeperClient * client) const = 0;
virtual String getHelpMessage() const = 0;
virtual String getName() const = 0;
virtual ~IKeeperClientCommand() = default;
};
using Command = std::shared_ptr<IKeeperClientCommand>;
class LSCommand : public IKeeperClientCommand
{
String getName() const override { return "ls"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
};
class CDCommand : public IKeeperClientCommand
{
String getName() const override { return "cd"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
{
String getName() const override { return "set"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
}
};
class CreateCommand : public IKeeperClientCommand
{
String getName() const override { return "create"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
};
class GetCommand : public IKeeperClientCommand
{
String getName() const override { return "get"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
};
class RMRCommand : public IKeeperClientCommand
{
String getName() const override { return "rmr"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
};
class HelpCommand : public IKeeperClientCommand
{
String getName() const override { return "help"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "help -- Prints this message"; }
};
class FourLetterWordCommand : public IKeeperClientCommand
{
String getName() const override { return "flwc"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
};
}

View File

@ -0,0 +1,343 @@
#include "KeeperClient.h"
#include "Commands.h"
#include <Client/ReplxxLineReader.h>
#include <Client/ClientBase.h>
#include <Common/EventNotifier.h>
#include <Common/filesystemHelpers.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Parsers/parseQuery.h>
#include <Poco/Util/HelpFormatter.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
String KeeperClient::executeFourLetterCommand(const String & command)
{
/// We need to create a new socket every time because ZooKeeper forcefully shuts down the connection after a four-letter-word command.
Poco::Net::StreamSocket socket;
socket.connect(Poco::Net::SocketAddress{zk_args.hosts[0]}, zk_args.connection_timeout_ms * 1000);
socket.setReceiveTimeout(zk_args.operation_timeout_ms * 1000);
socket.setSendTimeout(zk_args.operation_timeout_ms * 1000);
socket.setNoDelay(true);
ReadBufferFromPocoSocket in(socket);
WriteBufferFromPocoSocket out(socket);
out.write(command.data(), command.size());
out.next();
String result;
readStringUntilEOF(result, in);
in.next();
return result;
}
std::vector<String> KeeperClient::getCompletions(const String & prefix) const
{
Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false);
IParser::Pos pos(tokens, 0);
if (pos->type != TokenType::BareWord)
return registered_commands_and_four_letter_words;
++pos;
if (pos->isEnd())
return registered_commands_and_four_letter_words;
ParserToken{TokenType::Whitespace}.ignore(pos);
std::vector<String> result;
String string_path;
Expected expected;
if (!parseKeeperPath(pos, expected, string_path))
string_path = cwd;
if (!pos->isEnd())
return result;
fs::path path = string_path;
String parent_path;
if (string_path.ends_with("/"))
parent_path = getAbsolutePath(string_path);
else
parent_path = getAbsolutePath(path.parent_path());
try
{
for (const auto & child : zookeeper->getChildren(parent_path))
result.push_back(child);
}
catch (Coordination::Exception &) {}
std::sort(result.begin(), result.end());
return result;
}
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
{
std::cout << prompt << " Continue?\n";
need_confirmation = true;
confirmation_callback = callback;
}
fs::path KeeperClient::getAbsolutePath(const String & relative) const
{
String result;
if (relative.starts_with('/'))
result = fs::weakly_canonical(relative);
else
result = fs::weakly_canonical(cwd / relative);
if (result.ends_with('/') && result.size() > 1)
result.pop_back();
return result;
}
void KeeperClient::loadCommands(std::vector<Command> && new_commands)
{
for (const auto & command : new_commands)
{
String name = command->getName();
commands.insert({name, command});
registered_commands_and_four_letter_words.push_back(std::move(name));
}
for (const auto & command : four_letter_word_commands)
registered_commands_and_four_letter_words.push_back(command);
std::sort(registered_commands_and_four_letter_words.begin(), registered_commands_and_four_letter_words.end());
}
void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
{
Poco::Util::Application::defineOptions(options);
options.addOption(
Poco::Util::Option("help", "", "show help and exit")
.binding("help"));
options.addOption(
Poco::Util::Option("host", "h", "server hostname. default `localhost`")
.argument("host")
.binding("host"));
options.addOption(
Poco::Util::Option("port", "p", "server port. default `2181`")
.argument("port")
.binding("port"));
options.addOption(
Poco::Util::Option("query", "q", "will execute given query, then exit.")
.argument("query")
.binding("query"));
options.addOption(
Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
.argument("connection-timeout")
.binding("connection-timeout"));
options.addOption(
Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
.argument("session-timeout")
.binding("session-timeout"));
options.addOption(
Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
.argument("operation-timeout")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
.argument("history-file")
.binding("history-file"));
options.addOption(
Poco::Util::Option("log-level", "", "set log level")
.argument("log-level")
.binding("log-level"));
}
void KeeperClient::initialize(Poco::Util::Application & /* self */)
{
suggest.setCompletionsCallback(
[&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); });
loadCommands({
std::make_shared<LSCommand>(),
std::make_shared<CDCommand>(),
std::make_shared<SetCommand>(),
std::make_shared<CreateCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
});
String home_path;
const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
if (home_path_cstr)
home_path = home_path_cstr;
if (config().has("history-file"))
history_file = config().getString("history-file");
else
history_file = home_path + "/.keeper-client-history";
if (!history_file.empty() && !fs::exists(history_file))
{
try
{
FS::createFile(history_file);
}
catch (const ErrnoException & e)
{
if (e.getErrno() != EEXIST)
throw;
}
}
Poco::Logger::root().setLevel(config().getString("log-level", "error"));
EventNotifier::init();
}
void KeeperClient::executeQuery(const String & query)
{
std::vector<String> queries;
boost::algorithm::split(queries, query, boost::is_any_of(";"));
for (const auto & query_text : queries)
{
if (!query_text.empty())
processQueryText(query_text);
}
}
bool KeeperClient::processQueryText(const String & text)
{
if (exit_strings.find(text) != exit_strings.end())
return false;
try
{
if (need_confirmation)
{
need_confirmation = false;
if (text.size() == 1 && (text == "y" || text == "Y"))
confirmation_callback();
return true;
}
KeeperParser parser;
String message;
const char * begin = text.data();
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
if (!res)
{
std::cerr << message << "\n";
return true;
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
catch (Coordination::Exception & err)
{
std::cerr << err.message() << "\n";
}
return true;
}
void KeeperClient::runInteractive()
{
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {};
ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {});
lr.enableBracketedPaste();
while (true)
{
String prompt;
if (need_confirmation)
prompt = "[y/n] ";
else
prompt = cwd.string() + " :) ";
auto input = lr.readLine(prompt, ":-] ");
if (input.empty())
break;
if (!processQueryText(input))
break;
}
}
int KeeperClient::main(const std::vector<String> & /* args */)
{
if (config().hasOption("help"))
{
Poco::Util::HelpFormatter help_formatter(KeeperClient::options());
auto header_str = fmt::format("{} [OPTION]\n", commandName());
help_formatter.setHeader(header_str);
help_formatter.format(std::cout);
return 0;
}
auto host = config().getString("host", "localhost");
auto port = config().getString("port", "2181");
zk_args.hosts = {host + ":" + port};
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
if (config().has("query"))
executeQuery(config().getString("query"));
else
runInteractive();
return 0;
}
}
int mainEntryClickHouseKeeperClient(int argc, char ** argv)
{
try
{
DB::KeeperClient client;
client.init(argc, argv);
return client.run();
}
catch (const DB::Exception & e)
{
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
return 1;
}
catch (const boost::program_options::error & e)
{
std::cerr << "Bad arguments: " << e.what() << std::endl;
return DB::ErrorCodes::BAD_ARGUMENTS;
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
return 1;
}
}

View File

@ -0,0 +1,69 @@
#pragma once
#include "Parser.h"
#include "Commands.h"
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Client/LineReader.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <Parsers/ASTLiteral.h>
#include <Poco/Net/StreamSocket.h>
#include <Poco/Util/Application.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
static const NameSet four_letter_word_commands
{
"ruok", "mntr", "srvr", "stat", "srst", "conf",
"cons", "crst", "envi", "dirs", "isro", "wchs",
"wchc", "wchp", "dump", "csnp", "lgif", "rqld",
};
class KeeperClient: public Poco::Util::Application
{
public:
KeeperClient() = default;
void initialize(Poco::Util::Application & self) override;
int main(const std::vector<String> & args) override;
void defineOptions(Poco::Util::OptionSet & options) override;
fs::path getAbsolutePath(const String & relative) const;
void askConfirmation(const String & prompt, std::function<void()> && callback);
String executeFourLetterCommand(const String & command);
zkutil::ZooKeeperPtr zookeeper;
std::filesystem::path cwd = "/";
std::function<void()> confirmation_callback;
inline static std::map<String, Command> commands;
protected:
void runInteractive();
bool processQueryText(const String & text);
void executeQuery(const String & query);
void loadCommands(std::vector<Command> && new_commands);
std::vector<String> getCompletions(const String & prefix) const;
String history_file;
LineReader::Suggest suggest;
zkutil::ZooKeeperArgs zk_args;
bool need_confirmation = false;
std::vector<String> registered_commands_and_four_letter_words;
};
}

View File

@ -0,0 +1,94 @@
#include "Parser.h"
#include "KeeperClient.h"
namespace DB
{
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
{
expected.add(pos, getTokenName(TokenType::BareWord));
if (pos->type == TokenType::BareWord)
{
result = String(pos->begin, pos->end);
++pos;
ParserToken{TokenType::Whitespace}.ignore(pos);
return true;
}
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
ParserToken{TokenType::Whitespace}.ignore(pos);
return status;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
return parseIdentifierOrStringLiteral(pos, expected, path);
String result;
while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot)
{
result.append(pos->begin, pos->end);
++pos;
}
ParserToken{TokenType::Whitespace}.ignore(pos);
if (result.empty())
return false;
path = result;
return true;
}
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto query = std::make_shared<ASTKeeperQuery>();
for (const auto & pair : KeeperClient::commands)
expected.add(pos, pair.first.data());
for (const auto & flwc : four_letter_word_commands)
expected.add(pos, flwc.data());
if (pos->type != TokenType::BareWord)
return false;
String command_name(pos->begin, pos->end);
Command command;
auto iter = KeeperClient::commands.find(command_name);
if (iter == KeeperClient::commands.end())
{
if (command_name.size() == 4)
{
/// Treat it like four-letter command
/// Since keeper server can potentially have different version we don't want to match this command with embedded list
command = std::make_shared<FourLetterWordCommand>();
command_name = command->getName();
/// We also don't move the position, so the command will be parsed as an argument
}
else
return false;
}
else
{
command = iter->second;
++pos;
ParserToken{TokenType::Whitespace}.ignore(pos);
}
query->command = command_name;
if (!command->parse(pos, query, expected))
return false;
ParserToken{TokenType::Whitespace}.ignore(pos);
node = query;
return true;
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/IAST.h>
#include <Parsers/IParserBase.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
namespace DB
{
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result);
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path);
class ASTKeeperQuery : public IAST
{
public:
String getID(char) const override { return "KeeperQuery"; }
ASTPtr clone() const override { return std::make_shared<ASTKeeperQuery>(*this); }
String command;
std::vector<Field> args;
};
class KeeperParser : public IParserBase
{
protected:
const char * getName() const override { return "Keeper client query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -138,7 +138,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
OutdatedPartsLoadingThreadPool::initialize(
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
0, // We don't need any threads one all the parts will be loaded
config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000));
config().getUInt("max_outdated_parts_loading_thread_pool_size", 16));
}

View File

@ -62,6 +62,9 @@ int mainEntryClickHouseKeeper(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
#endif
@ -133,6 +136,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
{"keeper-converter", mainEntryClickHouseKeeperConverter},
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
{"keeper-client", mainEntryClickHouseKeeperClient},
#endif
#if ENABLE_CLICKHOUSE_INSTALL
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},

View File

@ -696,7 +696,7 @@ try
OutdatedPartsLoadingThreadPool::initialize(
server_settings.max_outdated_parts_loading_thread_pool_size,
0, // We don't need any threads one all the parts will be loaded
server_settings.outdated_part_loading_thread_pool_queue_size);
server_settings.max_outdated_parts_loading_thread_pool_size);
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))

View File

@ -90,14 +90,6 @@ namespace CurrentMetrics
namespace DB
{
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;

View File

@ -24,6 +24,14 @@ namespace po = boost::program_options;
namespace DB
{
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;

View File

@ -190,7 +190,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
connected = true;
sendHello();
receiveHello();
receiveHello(timeouts.handshake_timeout);
if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
sendAddendum();
@ -321,8 +321,10 @@ void Connection::sendAddendum()
}
void Connection::receiveHello()
void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
{
TimeoutSetter timeout_setter(*socket, socket->getSendTimeout(), handshake_timeout);
/// Receive hello packet.
UInt64 packet_type = 0;
@ -375,6 +377,10 @@ void Connection::receiveHello()
receiveException()->rethrow();
else
{
/// Reset timeout_setter before disconnect,
/// because after disconnect socket will be invalid.
timeout_setter.reset();
/// Close connection, to not stay in unsynchronised state.
disconnect();
throwUnexpectedPacket(packet_type, "Hello or Exception");

View File

@ -256,7 +256,7 @@ private:
void connect(const ConnectionTimeouts & timeouts);
void sendHello();
void sendAddendum();
void receiveHello();
void receiveHello(const Poco::Timespan & handshake_timeout);
#if USE_SSL
void sendClusterNameAndSalt();

Some files were not shown because too many files have changed in this diff Show More