Merge branch 'master' into async-loader-integration

This commit is contained in:
Sergei Trifonov 2023-06-03 12:52:47 +02:00 committed by GitHub
commit 6aadcffe59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
72 changed files with 2264 additions and 773 deletions

View File

@ -28,6 +28,19 @@ uint64_t getMemoryAmountOrZero()
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
// CGroups v2
std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
if (cgroupv2_limit.is_open())
{
uint64_t memory_limit = 0;
cgroupv2_limit >> memory_limit;
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
else
{
// CGroups v1
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
@ -36,6 +49,7 @@ uint64_t getMemoryAmountOrZero()
if (memory_limit > 0 && memory_limit < memory_amount)
memory_amount = memory_limit;
}
}
#endif
return memory_amount;

View File

@ -0,0 +1,53 @@
---
slug: /en/operations/utilities/clickhouse-keeper-client
sidebar_label: clickhouse-keeper-client
---
# clickhouse-keeper-client
A client application to interact with clickhouse-keeper by its native protocol.
## Keys {#clickhouse-keeper-client}
- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
- `-p N`, `--port=N` — Server port. Default value: 2181
- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
- `--help` — Shows the help message.
## Example {#clickhouse-keeper-client-example}
```bash
./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
Connected to ZooKeeper at [::1]:2181 with session_id 137
/ :) ls
keeper foo bar
/ :) cd keeper
/keeper :) ls
api_version
/keeper :) cd api_version
/keeper/api_version :) ls
/keeper/api_version :) cd xyz
Path /keeper/api_version/xyz does not exists
/keeper/api_version :) cd ../../
/ :) ls
keeper foo bar
/ :) get keeper/api_version
2
```
## Commands {#clickhouse-keeper-client-commands}
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
- `create <path> <value>` -- Creates new node
- `get <path>` -- Returns the node's value
- `remove <path>` -- Remove the node
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message

View File

@ -0,0 +1,52 @@
---
slug: /en/sql-reference/functions/geo/svg
sidebar_label: SVG
title: "Functions for Generating SVG images from Geo data"
---
## Syntax
``` sql
SVG(geometry,[style])
```
### Parameters
- `geometry` — Geo data
- `style` — Optional style name
### Returned value
- The SVG representation of the geometry:
- SVG circle
- SVG polygon
- SVG path
Type: String
## Examples
### Circle
```sql
SELECT SVG((0., 0.))
```
```response
<circle cx="0" cy="0" r="5" style=""/>
```
### Polygon
```sql
SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)])
```
```response
<polygon points="0,0 0,10 10,10 10,0 0,0" style=""/>
```
### Path
```sql
SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]])
```
```response
<g fill-rule="evenodd"><path d="M 0,0 L 0,10 L 10,10 L 10,0 L 0,0M 4,4 L 5,4 L 5,5 L 4,5 L 4,4 z " style=""/></g>
```

View File

@ -560,77 +560,6 @@ Result:
└───────────────────────────┘
```
## Entropy-learned hashing (experimental)
Entropy-learned hashing is disabled by default, to enable: `SET allow_experimental_hash_functions=1`.
Entropy-learned hashing is not a standalone hash function like `metroHash64`, `cityHash64`, `sipHash64` etc. Instead, it aims to preprocess
the data to be hashed in a way that a standalone hash function can be computed more efficiently while not compromising the hash quality,
i.e. the randomness of the hashes. For that, entropy-based hashing chooses a subset of the bytes in a training data set of Strings which has
the same randomness (entropy) as the original Strings. For example, if the Strings are in average 100 bytes long, and we pick a subset of 5
bytes, then a hash function will be 95% less expensive to evaluate. For details of the method, refer to [Entropy-Learned Hashing: Constant
Time Hashing with Controllable Uniformity](https://doi.org/10.1145/3514221.3517894).
Entropy-learned hashing has two phases:
1. A training phase on a representative but typically small set of Strings to be hashed. Training consists of two steps:
- Function `prepareTrainEntropyLearnedHash(data, id)` caches the training data in a global state under a given `id`. It returns dummy
value `0` on every row.
- Function `trainEntropyLearnedHash(id)` computes a minimal partial sub-key of the training data stored stored under `id` in the global
state. The cached training data in the global state is replaced by the partial key. Dummy value `0` is returned on every row.
2. An evaluation phase where hashes are computed using the previously calculated partial sub-keys. Function `entropyLearnedHash(data, id)`
hashes `data` using the partial subkey stored as `id`. CityHash64 is used as hash function.
The reason that the training phase comprises two steps is that ClickHouse processes data at chunk granularity but entropy-learned hashing
needs to process the entire training set at once.
Since functions `prepareTrainEntropyLearnedHash()` and `trainEntropyLearnedHash()` access global state, they should not be called in
parallel with the same `id`.
**Syntax**
``` sql
prepareTrainEntropyLearnedHash(data, id);
trainEntropyLearnedHash(id);
entropyLearnedHash(data, id);
```
**Example**
```sql
SET allow_experimental_hash_functions=1;
CREATE TABLE tab (col String) ENGINE=Memory;
INSERT INTO tab VALUES ('aa'), ('ba'), ('ca');
SELECT prepareTrainEntropyLearnedHash(col, 'id1') AS prepared FROM tab;
SELECT trainEntropyLearnedHash('id1') AS trained FROM tab;
SELECT entropyLearnedHash(col, 'id1') as hashes FROM tab;
```
Result:
``` response
┌─prepared─┐
│ 0 │
│ 0 │
│ 0 │
└──────────┘
┌─trained─┐
│ 0 │
│ 0 │
│ 0 │
└─────────┘
┌───────────────hashes─┐
│ 2603192927274642682 │
│ 4947675599669400333 │
│ 10783339242466472992 │
└──────────────────────┘
```
## metroHash64
Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value.

View File

@ -2480,3 +2480,75 @@ Result:
│ 286 │
└──────────────────────────┘
```
## generateRandomStructure
Generates random table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
**Syntax**
``` sql
generateRandomStructure([number_of_columns, seed])
```
**Arguments**
- `number_of_columns` — The desired number of columns in the result table structure. If set to 0 or `Null`, the number of columns will be random from 1 to 128. Default value: `Null`.
- `seed` - Random seed to produce stable results. If seed is not specified or set to `Null`, it is randomly generated.
All arguments must be constant.
**Returned value**
- Randomly generated table structure.
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
``` sql
SELECT generateRandomStructure()
```
Result:
``` text
┌─generateRandomStructure()─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ c1 Decimal32(5), c2 Date, c3 Tuple(LowCardinality(String), Int128, UInt64, UInt16, UInt8, IPv6), c4 Array(UInt128), c5 UInt32, c6 IPv4, c7 Decimal256(64), c8 Decimal128(3), c9 UInt256, c10 UInt64, c11 DateTime │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT generateRandomStructure(1)
```
Result:
``` text
┌─generateRandomStructure(1)─┐
│ c1 Map(UInt256, UInt16) │
└────────────────────────────┘
```
Query:
``` sql
SELECT generateRandomStructure(NULL, 33)
```
Result:
``` text
┌─generateRandomStructure(NULL, 33)─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ c1 DateTime, c2 Enum8('c2V0' = 0, 'c2V1' = 1, 'c2V2' = 2, 'c2V3' = 3), c3 LowCardinality(Nullable(FixedString(30))), c4 Int16, c5 Enum8('c5V0' = 0, 'c5V1' = 1, 'c5V2' = 2, 'c5V3' = 3), c6 Nullable(UInt8), c7 String, c8 Nested(e1 IPv4, e2 UInt8, e3 UInt16, e4 UInt16, e5 Int32, e6 Map(Date, Decimal256(70))) │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**Note**: the maximum nesting depth of complex types (Array, Tuple, Map, Nested) is limited to 16.
This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables.

View File

@ -119,19 +119,35 @@ The compiled expression cache is enabled/disabled with the query/user/profile-le
Resets the [query cache](../../operations/query-cache.md).
```sql
SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name]
```
## FLUSH LOGS
Flushes buffered log messages to system tables, e.g. system.query_log. Mainly useful for debugging since most system tables have a default flush interval of 7.5 seconds.
This will also create system tables even if message queue is empty.
```sql
SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
```
## RELOAD CONFIG
Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`. To reload all `USER` config use `SYSTEM RELOAD USERS`
```sql
SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
```
## RELOAD USERS
Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage.
```sql
SYSTEM RELOAD USERS [ON CLUSTER cluster_name]
```
## SHUTDOWN
Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
@ -149,7 +165,7 @@ ClickHouse can manage [distributed](../../engines/table-engines/special/distribu
Disables background data distribution when inserting data into distributed tables.
``` sql
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### FLUSH DISTRIBUTED
@ -157,7 +173,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
Forces ClickHouse to send data to cluster nodes synchronously. If any nodes are unavailable, ClickHouse throws an exception and stops query execution. You can retry the query until it succeeds, which will happen when all nodes are back online.
``` sql
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### START DISTRIBUTED SENDS
@ -165,7 +181,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
Enables background data distribution when inserting data into distributed tables.
``` sql
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
## Managing MergeTree Tables
@ -177,7 +193,7 @@ ClickHouse can manage background processes in [MergeTree](../../engines/table-en
Provides possibility to stop background merges for tables in the MergeTree family:
``` sql
SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
:::note
@ -189,7 +205,7 @@ SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
Provides possibility to start background merges for tables in the MergeTree family:
``` sql
SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
### STOP TTL MERGES
@ -198,7 +214,7 @@ Provides possibility to stop background delete old data according to [TTL expres
Returns `Ok.` even if table does not exist or table has not MergeTree engine. Returns error when database does not exist:
``` sql
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START TTL MERGES
@ -207,7 +223,7 @@ Provides possibility to start background delete old data according to [TTL expre
Returns `Ok.` even if table does not exist. Returns error when database does not exist:
``` sql
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### STOP MOVES
@ -216,7 +232,7 @@ Provides possibility to stop background move data according to [TTL table expres
Returns `Ok.` even if table does not exist. Returns error when database does not exist:
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START MOVES
@ -225,7 +241,7 @@ Provides possibility to start background move data according to [TTL table expre
Returns `Ok.` even if table does not exist. Returns error when database does not exist:
``` sql
SYSTEM START MOVES [[db.]merge_tree_family_table_name]
SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -241,7 +257,7 @@ SYSTEM UNFREEZE WITH NAME <backup_name>
Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.
``` sql
SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
SYSTEM WAIT LOADING PARTS [ON CLUSTER cluster_name] [db.]merge_tree_family_table_name
```
## Managing ReplicatedMergeTree Tables
@ -254,7 +270,7 @@ Provides possibility to stop background fetches for inserted parts for tables in
Always returns `Ok.` regardless of the table engine and even if table or database does not exist.
``` sql
SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START FETCHES
@ -263,7 +279,7 @@ Provides possibility to start background fetches for inserted parts for tables i
Always returns `Ok.` regardless of the table engine and even if table or database does not exist.
``` sql
SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATED SENDS
@ -271,7 +287,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
Provides possibility to stop background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATED SENDS
@ -279,7 +295,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
Provides possibility to start background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATION QUEUES
@ -287,7 +303,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause:
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATION QUEUES
@ -295,7 +311,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause:
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### SYNC REPLICA
@ -318,7 +334,7 @@ Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMe
Initialization of replication queue based on ZooKeeper data happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations.
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
### RESTORE REPLICA
@ -384,7 +400,7 @@ Provides possibility to reinitialize Zookeeper sessions state for all `Replicate
Allows to drop filesystem cache.
```sql
SYSTEM DROP FILESYSTEM CACHE
SYSTEM DROP FILESYSTEM CACHE [ON CLUSTER cluster_name]
```
### SYNC FILE CACHE
@ -396,5 +412,5 @@ It's too heavy and has potential for misuse.
Will do sync syscall.
```sql
SYSTEM SYNC FILE CACHE
SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
```

View File

@ -11,7 +11,7 @@ Allows to populate test tables with data.
Not all types are supported.
``` sql
generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]])
generateRandom(['name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]])
```
**Arguments**
@ -53,5 +53,49 @@ SELECT * FROM random;
└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
```
In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generateRandomStructure):
```sql
SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3;
```
```text
┌──────────────────c1─┬──────────────────c2─┬─c3─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c4──────────────────────────────────────┐
│ 1996-04-15 06:40:05 │ 33954608387.2844801 │ ['232.78.216.176','9.244.59.211','211.21.80.152','44.49.94.109','165.77.195.182','68.167.134.239','212.13.24.185','1.197.255.35','192.55.131.232'] │ 45d9:2b52:ab6:1c59:185b:515:c5b6:b781 │
│ 2063-01-13 01:22:27 │ 36155064970.9514454 │ ['176.140.188.101'] │ c65a:2626:41df:8dee:ec99:f68d:c6dd:6b30 │
│ 2090-02-28 14:50:56 │ 3864327452.3901373 │ ['155.114.30.32'] │ 57e9:5229:93ab:fbf3:aae7:e0e4:d1eb:86b │
└─────────────────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┘
```
With missing `structure` argument (in this case the structure is random):
```sql
SELECT * FROM generateRandom() LIMIT 3;
```
```text
┌───c1─┬─────────c2─┬─────────────────────c3─┬──────────────────────c4─┬─c5───────┐
│ -128 │ 317300854 │ 2030-08-16 08:22:20.65 │ 1994-08-16 12:08:56.745 │ R0qgiC46 │
│ 40 │ -744906827 │ 2059-04-16 06:31:36.98 │ 1975-07-16 16:28:43.893 │ PuH4M*MZ │
│ -55 │ 698652232 │ 2052-08-04 20:13:39.68 │ 1998-09-20 03:48:29.279 │ │
└──────┴────────────┴────────────────────────┴─────────────────────────┴──────────┘
```
With random seed both for random structure and random data:
```sql
SELECT * FROM generateRandom(11) LIMIT 3;
```
```text
┌───────────────────────────────────────c1─┬─────────────────────────────────────────────────────────────────────────────c2─┬─────────────────────────────────────────────────────────────────────────────c3─┬─────────c4─┬─────────────────────────────────────────────────────────────────────────────c5─┬──────────────────────c6─┬─c7──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c8──────────────────────────────────────┬─────────c9─┐
│ -77422512305044606600216318673365695785 │ 636812099959807642229.503817849012019401335326013846687285151335352272727523 │ -34944452809785978175157829109276115789694605299387223845886143311647505037529 │ 544473976 │ 111220388331710079615337037674887514156741572807049614590010583571763691328563 │ 22016.22623506465 │ {'2052-01-31 20:25:33':4306400876908509081044405485378623663,'1993-04-16 15:58:49':164367354809499452887861212674772770279,'2101-08-19 03:07:18':-60676948945963385477105077735447194811,'2039-12-22 22:31:39':-59227773536703059515222628111999932330} │ a7b2:8f58:4d07:6707:4189:80cf:92f5:902d │ 1950-07-14 │
│ -159940486888657488786004075627859832441 │ 629206527868163085099.8195700356331771569105231840157308480121506729741348442 │ -53203761250367440823323469081755775164053964440214841464405368882783634063735 │ 2187136525 │ 94881662451116595672491944222189810087991610568040618106057495823910493624275 │ 1.3095786748458954e-104 │ {} │ a051:e3da:2e0a:c69:7835:aed6:e8b:3817 │ 1943-03-25 │
│ -5239084224358020595591895205940528518 │ -529937657954363597180.1709207212648004850138812370209091520162977548101577846 │ 47490343304582536176125359129223180987770215457970451211489086575421345731671 │ 1637451978 │ 101899445785010192893461828129714741298630410942962837910400961787305271699002 │ 2.4344456058391296e223 │ {'2013-12-22 17:42:43':80271108282641375975566414544777036006,'2041-03-08 10:28:17':169706054082247533128707458270535852845,'1986-08-31 23:07:38':-54371542820364299444195390357730624136,'2094-04-23 21:26:50':7944954483303909347454597499139023465} │ 1293:a726:e899:9bfc:8c6f:2aa1:22c9:b635 │ 1924-11-20 │
└──────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────┴─────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┴────────────┘
```
**Note:** `generateRandom(generateRandomStructure(), [random seed], max_string_length, max_array_length)` with large enough `max_array_length` can generate really huge output due to possible big nesting depth (up to 16) of complex types (`Array`, `Tuple`, `Map`, `Nested`).
## Related content
- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)

View File

@ -39,7 +39,7 @@ SELECT name, status FROM system.dictionaries;
**Синтаксис**
```sql
SYSTEM RELOAD MODELS
SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
```
## RELOAD MODEL {#query_language-system-reload-model}
@ -49,7 +49,7 @@ SYSTEM RELOAD MODELS
**Синтаксис**
```sql
SYSTEM RELOAD MODEL <model_path>
SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_path>
```
## RELOAD FUNCTIONS {#query_language-system-reload-functions}
@ -59,8 +59,8 @@ SYSTEM RELOAD MODEL <model_path>
**Синтаксис**
```sql
RELOAD FUNCTIONS
RELOAD FUNCTION function_name
RELOAD FUNCTIONS [ON CLUSTER cluster_name]
RELOAD FUNCTION function_name [ON CLUSTER cluster_name]
```
## DROP DNS CACHE {#query_language-system-drop-dns-cache}
@ -106,10 +106,18 @@ Cкомпилированные выражения используются ко
Записывает буферы логов в системные таблицы (например system.query_log). Позволяет не ждать 7.5 секунд при отладке.
Если буфер логов пустой, то этот запрос просто создаст системные таблицы.
```sql
SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
```
## RELOAD CONFIG {#query_language-system-reload-config}
Перечитывает конфигурацию настроек ClickHouse. Используется при хранении конфигурации в zookeeper.
```sql
SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
```
## SHUTDOWN {#query_language-system-shutdown}
Штатно завершает работу ClickHouse (аналог `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
@ -127,7 +135,7 @@ ClickHouse может оперировать [распределёнными](..
Отключает фоновую отправку при вставке данных в распределённые таблицы.
``` sql
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### FLUSH DISTRIBUTED {#query_language-system-flush-distributed}
@ -135,7 +143,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
В синхронном режиме отправляет все данные на узлы кластера. Если какие-либо узлы недоступны, ClickHouse генерирует исключение и останавливает выполнение запроса. Такой запрос можно повторять до успешного завершения, что будет означать возвращение связанности с остальными узлами кластера.
``` sql
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends}
@ -143,7 +151,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
Включает фоновую отправку при вставке данных в распределенные таблицы.
``` sql
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
## Managing MergeTree Tables {#query-language-system-mergetree}
@ -155,7 +163,7 @@ ClickHouse может управлять фоновыми процессами
Позволяет остановить фоновые мержи для таблиц семейства MergeTree:
``` sql
SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
:::note
@ -166,7 +174,7 @@ SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
Включает фоновые мержи для таблиц семейства MergeTree:
``` sql
SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
### STOP TTL MERGES {#query_language-stop-ttl-merges}
@ -175,7 +183,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
``` sql
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
@ -184,7 +192,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
``` sql
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
@ -193,7 +201,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
@ -202,7 +210,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:
``` sql
SYSTEM START MOVES [[db.]merge_tree_family_table_name]
SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -223,7 +231,7 @@ ClickHouse может управлять фоновыми процессами
Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
``` sql
SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START FETCHES {#query_language-system-start-fetches}
@ -232,7 +240,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.
``` sql
SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -240,7 +248,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
Позволяет остановить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -248,7 +256,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
Позволяет запустить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
@ -256,7 +264,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
@ -264,7 +272,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### SYNC REPLICA {#query_language-system-sync-replica}
@ -287,7 +295,7 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHT
Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при `ATTACH TABLE`. Некоторое время таблица будет недоступна для любых операций.
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
### RESTORE REPLICA {#query_language-system-restore-replica}

View File

@ -71,10 +71,18 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
将日志信息缓冲数据刷入系统表例如system.query_log。调试时允许等待不超过7.5秒。当信息队列为空时,会创建系统表。
```sql
SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
```
## RELOAD CONFIG {#query_language-system-reload-config}
重新加载ClickHouse的配置。用于当配置信息存放在ZooKeeper时。
```sql
SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
```
## SHUTDOWN {#query_language-system-shutdown}
关闭ClickHouse服务类似于 `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`
@ -93,7 +101,7 @@ ClickHouse可以管理 [distribute](../../engines/table-engines/special/distribu
当向分布式表插入数据时,禁用后台的分布式数据分发。
``` sql
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### FLUSH DISTRIBUTED {#query_language-system-flush-distributed}
@ -101,7 +109,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
强制让ClickHouse同步向集群节点同步发送数据。如果有节点失效ClickHouse抛出异常并停止插入操作。当所有节点都恢复上线时你可以重试之前的操作直到成功执行。
``` sql
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends}
@ -109,7 +117,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
当向分布式表插入数据时,允许后台的分布式数据分发。
``` sql
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
```
## Managing MergeTree Tables {#query-language-system-mergetree}
@ -121,7 +129,7 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/
为MergeTree系列引擎表停止后台合并操作。
``` sql
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
:::note
@ -133,7 +141,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
为MergeTree系列引擎表启动后台合并操作。
``` sql
SYSTEM START MERGES [[db.]merge_tree_family_table_name]
SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
### STOP TTL MERGES {#query_language-stop-ttl-merges}
@ -142,7 +150,7 @@ SYSTEM START MERGES [[db.]merge_tree_family_table_name]
不管表存在与否,都返回 `OK.`。当数据库不存在时返回错误。
``` sql
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
@ -151,7 +159,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
``` sql
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
@ -160,7 +168,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
@ -169,7 +177,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
```
### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -190,7 +198,7 @@ SYSTEM UNFREEZE WITH NAME <backup_name>
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START FETCHES {#query_language-system-start-fetches}
@ -199,7 +207,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -207,7 +215,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -215,7 +223,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
@ -224,7 +232,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
停止从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含merges, fetches, mutation带有 `ON CLUSTER`的ddl语句
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
@ -232,7 +240,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
启动从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含merges, fetches, mutation带有 `ON CLUSTER`的ddl语句
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
```
### SYNC REPLICA {#query_language-system-sync-replica}
@ -250,7 +258,7 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHT
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
### RESTART REPLICAS {#query_language-system-restart-replicas}

View File

@ -55,6 +55,8 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_
option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_KEEPER_CLIENT "ClickHouse Keeper Client" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL})
@ -169,6 +171,13 @@ else()
message(STATUS "ClickHouse keeper-converter mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
message(STATUS "ClickHouse keeper-client mode: ON")
else()
message(STATUS "ClickHouse keeper-client mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_DISKS)
message(STATUS "Clickhouse disks mode: ON")
else()
@ -237,6 +246,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_subdirectory (keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_subdirectory (keeper-client)
endif()
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)
endif ()
@ -301,6 +314,9 @@ endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
clickhouse_target_link_split_lib(clickhouse keeper-converter)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
clickhouse_target_link_split_lib(clickhouse keeper-client)
endif()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
@ -392,6 +408,11 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)
add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)

View File

@ -17,6 +17,7 @@
#cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CLIENT
#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER
#cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
#cmakedefine01 ENABLE_CLICKHOUSE_SU

View File

@ -0,0 +1,9 @@
set (CLICKHOUSE_KEEPER_CLIENT_SOURCES KeeperClient.cpp Parser.cpp Commands.cpp)
set (CLICKHOUSE_KEEPER_CLIENT_LINK
PRIVATE
boost::program_options
dbms
)
clickhouse_program_add(keeper-client)

View File

@ -0,0 +1,196 @@
#include "Commands.h"
#include "KeeperClient.h"
namespace DB
{
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path;
if (!query->args.empty())
path = client->getAbsolutePath(query->args[0].safeGet<String>());
else
path = client->cwd;
for (const auto & child : client->zookeeper->getChildren(path))
std::cout << child << " ";
std::cout << "\n";
}
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
if (query->args.empty())
return;
auto new_path = client->getAbsolutePath(query->args[0].safeGet<String>());
if (!client->zookeeper->exists(new_path))
std::cerr << "Path " << new_path << " does not exists\n";
else
client->cwd = new_path;
}
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
ASTPtr version;
if (ParserNumber{}.parse(pos, version, expected))
node->args.push_back(version->as<ASTLiteral &>().value);
return true;
}
void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
if (query->args.size() == 2)
client->zookeeper->set(client->getAbsolutePath(query->args[0].safeGet<String>()), query->args[1].safeGet<String>());
else
client->zookeeper->set(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<Int32>(query->args[2].safeGet<Int64>()));
}
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
int mode = zkutil::CreateMode::Persistent;
if (ParserKeyword{"PERSISTENT"}.ignore(pos, expected))
mode = zkutil::CreateMode::Persistent;
else if (ParserKeyword{"EPHEMERAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::Ephemeral;
else if (ParserKeyword{"EPHEMERAL SEQUENTIAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::EphemeralSequential;
else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected))
mode = zkutil::CreateMode::PersistentSequential;
node->args.push_back(mode);
return true;
}
void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
client->zookeeper->create(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<int>(query->args[2].safeGet<Int64>()));
}
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
}
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
client->askConfirmation("You are going to recursively delete path " + path,
[client, path]{ client->zookeeper->removeRecursive(path); });
}
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;
}
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
{
for (const auto & pair : KeeperClient::commands)
std::cout << pair.second->getHelpMessage() << "\n";
}
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
expected.add(pos, "four-letter-word command");
if (pos->type != TokenType::BareWord)
return false;
String cmd(pos->begin, pos->end);
if (cmd.size() != 4)
return false;
++pos;
node->args.push_back(std::move(cmd));
return true;
}
void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
std::cout << client->executeFourLetterCommand(query->args[0].safeGet<String>()) << "\n";
}
}

View File

@ -0,0 +1,131 @@
#pragma once
#include "Parser.h"
namespace DB
{
class KeeperClient;
class IKeeperClientCommand
{
public:
static const String name;
virtual bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const = 0;
virtual void execute(const ASTKeeperQuery * query, KeeperClient * client) const = 0;
virtual String getHelpMessage() const = 0;
virtual String getName() const = 0;
virtual ~IKeeperClientCommand() = default;
};
using Command = std::shared_ptr<IKeeperClientCommand>;
class LSCommand : public IKeeperClientCommand
{
String getName() const override { return "ls"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
};
class CDCommand : public IKeeperClientCommand
{
String getName() const override { return "cd"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
{
String getName() const override { return "set"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
}
};
class CreateCommand : public IKeeperClientCommand
{
String getName() const override { return "create"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
};
class GetCommand : public IKeeperClientCommand
{
String getName() const override { return "get"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
};
class RMRCommand : public IKeeperClientCommand
{
String getName() const override { return "rmr"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
};
class HelpCommand : public IKeeperClientCommand
{
String getName() const override { return "help"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "help -- Prints this message"; }
};
class FourLetterWordCommand : public IKeeperClientCommand
{
String getName() const override { return "flwc"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
};
}

View File

@ -0,0 +1,343 @@
#include "KeeperClient.h"
#include "Commands.h"
#include <Client/ReplxxLineReader.h>
#include <Client/ClientBase.h>
#include <Common/EventNotifier.h>
#include <Common/filesystemHelpers.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Parsers/parseQuery.h>
#include <Poco/Util/HelpFormatter.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
String KeeperClient::executeFourLetterCommand(const String & command)
{
/// We need to create a new socket every time because ZooKeeper forcefully shuts down the connection after a four-letter-word command.
Poco::Net::StreamSocket socket;
socket.connect(Poco::Net::SocketAddress{zk_args.hosts[0]}, zk_args.connection_timeout_ms * 1000);
socket.setReceiveTimeout(zk_args.operation_timeout_ms * 1000);
socket.setSendTimeout(zk_args.operation_timeout_ms * 1000);
socket.setNoDelay(true);
ReadBufferFromPocoSocket in(socket);
WriteBufferFromPocoSocket out(socket);
out.write(command.data(), command.size());
out.next();
String result;
readStringUntilEOF(result, in);
in.next();
return result;
}
std::vector<String> KeeperClient::getCompletions(const String & prefix) const
{
Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false);
IParser::Pos pos(tokens, 0);
if (pos->type != TokenType::BareWord)
return registered_commands_and_four_letter_words;
++pos;
if (pos->isEnd())
return registered_commands_and_four_letter_words;
ParserToken{TokenType::Whitespace}.ignore(pos);
std::vector<String> result;
String string_path;
Expected expected;
if (!parseKeeperPath(pos, expected, string_path))
string_path = cwd;
if (!pos->isEnd())
return result;
fs::path path = string_path;
String parent_path;
if (string_path.ends_with("/"))
parent_path = getAbsolutePath(string_path);
else
parent_path = getAbsolutePath(path.parent_path());
try
{
for (const auto & child : zookeeper->getChildren(parent_path))
result.push_back(child);
}
catch (Coordination::Exception &) {}
std::sort(result.begin(), result.end());
return result;
}
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
{
std::cout << prompt << " Continue?\n";
need_confirmation = true;
confirmation_callback = callback;
}
fs::path KeeperClient::getAbsolutePath(const String & relative) const
{
String result;
if (relative.starts_with('/'))
result = fs::weakly_canonical(relative);
else
result = fs::weakly_canonical(cwd / relative);
if (result.ends_with('/') && result.size() > 1)
result.pop_back();
return result;
}
void KeeperClient::loadCommands(std::vector<Command> && new_commands)
{
for (const auto & command : new_commands)
{
String name = command->getName();
commands.insert({name, command});
registered_commands_and_four_letter_words.push_back(std::move(name));
}
for (const auto & command : four_letter_word_commands)
registered_commands_and_four_letter_words.push_back(command);
std::sort(registered_commands_and_four_letter_words.begin(), registered_commands_and_four_letter_words.end());
}
void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
{
Poco::Util::Application::defineOptions(options);
options.addOption(
Poco::Util::Option("help", "", "show help and exit")
.binding("help"));
options.addOption(
Poco::Util::Option("host", "h", "server hostname. default `localhost`")
.argument("host")
.binding("host"));
options.addOption(
Poco::Util::Option("port", "p", "server port. default `2181`")
.argument("port")
.binding("port"));
options.addOption(
Poco::Util::Option("query", "q", "will execute given query, then exit.")
.argument("query")
.binding("query"));
options.addOption(
Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
.argument("connection-timeout")
.binding("connection-timeout"));
options.addOption(
Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
.argument("session-timeout")
.binding("session-timeout"));
options.addOption(
Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
.argument("operation-timeout")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
.argument("history-file")
.binding("history-file"));
options.addOption(
Poco::Util::Option("log-level", "", "set log level")
.argument("log-level")
.binding("log-level"));
}
void KeeperClient::initialize(Poco::Util::Application & /* self */)
{
suggest.setCompletionsCallback(
[&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); });
loadCommands({
std::make_shared<LSCommand>(),
std::make_shared<CDCommand>(),
std::make_shared<SetCommand>(),
std::make_shared<CreateCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
});
String home_path;
const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
if (home_path_cstr)
home_path = home_path_cstr;
if (config().has("history-file"))
history_file = config().getString("history-file");
else
history_file = home_path + "/.keeper-client-history";
if (!history_file.empty() && !fs::exists(history_file))
{
try
{
FS::createFile(history_file);
}
catch (const ErrnoException & e)
{
if (e.getErrno() != EEXIST)
throw;
}
}
Poco::Logger::root().setLevel(config().getString("log-level", "error"));
EventNotifier::init();
}
void KeeperClient::executeQuery(const String & query)
{
std::vector<String> queries;
boost::algorithm::split(queries, query, boost::is_any_of(";"));
for (const auto & query_text : queries)
{
if (!query_text.empty())
processQueryText(query_text);
}
}
bool KeeperClient::processQueryText(const String & text)
{
if (exit_strings.find(text) != exit_strings.end())
return false;
try
{
if (need_confirmation)
{
need_confirmation = false;
if (text.size() == 1 && (text == "y" || text == "Y"))
confirmation_callback();
return true;
}
KeeperParser parser;
String message;
const char * begin = text.data();
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
if (!res)
{
std::cerr << message << "\n";
return true;
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
catch (Coordination::Exception & err)
{
std::cerr << err.message() << "\n";
}
return true;
}
void KeeperClient::runInteractive()
{
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {};
ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {});
lr.enableBracketedPaste();
while (true)
{
String prompt;
if (need_confirmation)
prompt = "[y/n] ";
else
prompt = cwd.string() + " :) ";
auto input = lr.readLine(prompt, ":-] ");
if (input.empty())
break;
if (!processQueryText(input))
break;
}
}
int KeeperClient::main(const std::vector<String> & /* args */)
{
if (config().hasOption("help"))
{
Poco::Util::HelpFormatter help_formatter(KeeperClient::options());
auto header_str = fmt::format("{} [OPTION]\n", commandName());
help_formatter.setHeader(header_str);
help_formatter.format(std::cout);
return 0;
}
auto host = config().getString("host", "localhost");
auto port = config().getString("port", "2181");
zk_args.hosts = {host + ":" + port};
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
if (config().has("query"))
executeQuery(config().getString("query"));
else
runInteractive();
return 0;
}
}
int mainEntryClickHouseKeeperClient(int argc, char ** argv)
{
try
{
DB::KeeperClient client;
client.init(argc, argv);
return client.run();
}
catch (const DB::Exception & e)
{
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
return 1;
}
catch (const boost::program_options::error & e)
{
std::cerr << "Bad arguments: " << e.what() << std::endl;
return DB::ErrorCodes::BAD_ARGUMENTS;
}
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
return 1;
}
}

View File

@ -0,0 +1,69 @@
#pragma once
#include "Parser.h"
#include "Commands.h"
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Client/LineReader.h>
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <Parsers/ASTLiteral.h>
#include <Poco/Net/StreamSocket.h>
#include <Poco/Util/Application.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
static const NameSet four_letter_word_commands
{
"ruok", "mntr", "srvr", "stat", "srst", "conf",
"cons", "crst", "envi", "dirs", "isro", "wchs",
"wchc", "wchp", "dump", "csnp", "lgif", "rqld",
};
class KeeperClient: public Poco::Util::Application
{
public:
KeeperClient() = default;
void initialize(Poco::Util::Application & self) override;
int main(const std::vector<String> & args) override;
void defineOptions(Poco::Util::OptionSet & options) override;
fs::path getAbsolutePath(const String & relative) const;
void askConfirmation(const String & prompt, std::function<void()> && callback);
String executeFourLetterCommand(const String & command);
zkutil::ZooKeeperPtr zookeeper;
std::filesystem::path cwd = "/";
std::function<void()> confirmation_callback;
inline static std::map<String, Command> commands;
protected:
void runInteractive();
bool processQueryText(const String & text);
void executeQuery(const String & query);
void loadCommands(std::vector<Command> && new_commands);
std::vector<String> getCompletions(const String & prefix) const;
String history_file;
LineReader::Suggest suggest;
zkutil::ZooKeeperArgs zk_args;
bool need_confirmation = false;
std::vector<String> registered_commands_and_four_letter_words;
};
}

View File

@ -0,0 +1,94 @@
#include "Parser.h"
#include "KeeperClient.h"
namespace DB
{
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
{
expected.add(pos, getTokenName(TokenType::BareWord));
if (pos->type == TokenType::BareWord)
{
result = String(pos->begin, pos->end);
++pos;
ParserToken{TokenType::Whitespace}.ignore(pos);
return true;
}
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
ParserToken{TokenType::Whitespace}.ignore(pos);
return status;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
return parseIdentifierOrStringLiteral(pos, expected, path);
String result;
while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot)
{
result.append(pos->begin, pos->end);
++pos;
}
ParserToken{TokenType::Whitespace}.ignore(pos);
if (result.empty())
return false;
path = result;
return true;
}
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto query = std::make_shared<ASTKeeperQuery>();
for (const auto & pair : KeeperClient::commands)
expected.add(pos, pair.first.data());
for (const auto & flwc : four_letter_word_commands)
expected.add(pos, flwc.data());
if (pos->type != TokenType::BareWord)
return false;
String command_name(pos->begin, pos->end);
Command command;
auto iter = KeeperClient::commands.find(command_name);
if (iter == KeeperClient::commands.end())
{
if (command_name.size() == 4)
{
/// Treat it like four-letter command
/// Since keeper server can potentially have different version we don't want to match this command with embedded list
command = std::make_shared<FourLetterWordCommand>();
command_name = command->getName();
/// We also don't move the position, so the command will be parsed as an argument
}
else
return false;
}
else
{
command = iter->second;
++pos;
ParserToken{TokenType::Whitespace}.ignore(pos);
}
query->command = command_name;
if (!command->parse(pos, query, expected))
return false;
ParserToken{TokenType::Whitespace}.ignore(pos);
node = query;
return true;
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/IAST.h>
#include <Parsers/IParserBase.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
namespace DB
{
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result);
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path);
class ASTKeeperQuery : public IAST
{
public:
String getID(char) const override { return "KeeperQuery"; }
ASTPtr clone() const override { return std::make_shared<ASTKeeperQuery>(*this); }
String command;
std::vector<Field> args;
};
class KeeperParser : public IParserBase
{
protected:
const char * getName() const override { return "Keeper client query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -62,6 +62,9 @@ int mainEntryClickHouseKeeper(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
#endif
@ -133,6 +136,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
{"keeper-converter", mainEntryClickHouseKeeperConverter},
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
{"keeper-client", mainEntryClickHouseKeeperClient},
#endif
#if ENABLE_CLICKHOUSE_INSTALL
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},

View File

@ -90,14 +90,6 @@ namespace CurrentMetrics
namespace DB
{
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;

View File

@ -24,6 +24,14 @@ namespace po = boost::program_options;
namespace DB
{
static const NameSet exit_strings
{
"exit", "quit", "logout", "учше", "йгше", "дщпщге",
"exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
"q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
};
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;

View File

@ -81,15 +81,33 @@ replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String &
std::lock_guard lock(mutex);
Words to_search;
bool no_case = false;
/// Only perform case sensitive completion when the prefix string contains any uppercase characters
if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast<wint_t>(x)); }))
{
to_search = words_no_case;
no_case = true;
}
else
to_search = words;
if (custom_completions_callback)
{
auto new_words = custom_completions_callback(prefix, prefix_length);
assert(std::is_sorted(new_words.begin(), new_words.end()));
addNewWords(to_search, new_words, std::less<std::string>{});
}
if (no_case)
range = std::equal_range(
words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});
else
range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
range = std::equal_range(
to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
{
return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0;
});

View File

@ -18,15 +18,20 @@ public:
struct Suggest
{
using Words = std::vector<std::string>;
using Callback = std::function<Words(const String & prefix, size_t prefix_length)>;
/// Get vector for the matched range of words if any.
replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
void addWords(Words && new_words);
void setCompletionsCallback(Callback && callback) { custom_completions_callback = callback; }
private:
Words words TSA_GUARDED_BY(mutex);
Words words_no_case TSA_GUARDED_BY(mutex);
Callback custom_completions_callback = nullptr;
std::mutex mutex;
};

View File

@ -69,13 +69,23 @@ AsynchronousMetrics::AsynchronousMetrics(
/// CGroups v2
openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
if (cgroupmem_limit_in_bytes)
{
openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
}
openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max);
/// CGroups v1
if (!cgroupmem_limit_in_bytes)
{
openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
if (!cgroupmem_usage_in_bytes)
openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
}
if (!cgroupcpu_max)
{
openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period);
openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
}
openSensors();
openBlockDevices();
@ -926,6 +936,61 @@ void AsynchronousMetrics::update(TimePoint update_time)
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (cgroupcpu_max)
{
try {
cgroupcpu_max->rewind();
uint64_t quota = 0;
uint64_t period = 0;
std::string line;
readText(line, *cgroupcpu_max);
auto space = line.find(' ');
if (line.rfind("max", space) == std::string::npos)
{
auto field1 = line.substr(0, space);
quota = std::stoull(field1);
}
if (space != std::string::npos)
{
auto field2 = line.substr(space + 1);
period = std::stoull(field2);
}
new_values["CGroupCpuCfsPeriod"] = { period, "The CFS period of CPU cgroup."};
new_values["CGroupCpuCfsQuota"] = { quota, "The CFS quota of CPU cgroup. If stated zero, the quota is max."};
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
else if (cgroupcpu_cfs_quota && cgroupcpu_cfs_period)
{
try {
cgroupcpu_cfs_quota->rewind();
cgroupcpu_cfs_period->rewind();
uint64_t quota = 0;
uint64_t period = 0;
tryReadText(quota, *cgroupcpu_cfs_quota);
tryReadText(period, *cgroupcpu_cfs_period);
new_values["CGroupCpuCfsPeriod"] = { period, "The CFS period of CPU cgroup."};
new_values["CGroupCpuCfsQuota"] = { quota, "The CFS quota of CPU cgroup. If stated zero, the quota is max."};
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
if (meminfo)
{
try

View File

@ -110,6 +110,9 @@ private:
std::optional<ReadBufferFromFilePRead> cgroupmem_limit_in_bytes;
std::optional<ReadBufferFromFilePRead> cgroupmem_usage_in_bytes;
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period;
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota;
std::optional<ReadBufferFromFilePRead> cgroupcpu_max;
std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal;

View File

@ -124,7 +124,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
this->start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Add new initialization here
this->addAttribute("clickhouse.thread_id", getThreadId());
}
catch (...)
{

View File

@ -202,6 +202,8 @@ void preparePostgreSQLArrayInfo(
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
else if (which.isFloat64())
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
else if (which.isUUID())
parser = [](std::string & field) -> Field { return parse<UUID>(field); };
else if (which.isString() || which.isFixedString())
parser = [](std::string & field) -> Field { return field; };
else if (which.isDate())

View File

@ -651,6 +651,8 @@ class IColumn;
M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
\
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \

View File

@ -71,7 +71,7 @@ std::string extractTableName(const std::string & nested_name)
}
Block flatten(const Block & block)
static Block flattenImpl(const Block & block, bool flatten_named_tuple)
{
Block res;
@ -114,7 +114,7 @@ Block flatten(const Block & block)
else
res.insert(elem);
}
else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(elem.type.get()))
else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(elem.type.get()); type_tuple && flatten_named_tuple)
{
if (type_tuple->haveExplicitNames())
{
@ -143,6 +143,17 @@ Block flatten(const Block & block)
return res;
}
Block flatten(const Block & block)
{
return flattenImpl(block, true);
}
Block flattenArrayOfTuples(const Block & block)
{
return flattenImpl(block, false);
}
namespace
{

View File

@ -23,6 +23,9 @@ namespace Nested
/// 2) For an Array with named Tuple element column, a Array(Tuple(x ..., y ..., ...)), replace it with multiple Array Columns, a.x ..., a.y ..., ...
Block flatten(const Block & block);
/// Same as flatten but only for Array with named Tuple element column.
Block flattenArrayOfTuples(const Block & block);
/// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
NamesAndTypesList collect(const NamesAndTypesList & names_and_types);

View File

@ -42,6 +42,11 @@ namespace ErrorCodes
extern const int ARGUMENT_OUT_OF_BOUND;
}
static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size)
{
/// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file.
return std::min<size_t>(std::max<size_t>(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size);
}
AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer(
ImplPtr impl_,
@ -49,11 +54,11 @@ AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer(
const ReadSettings & settings_,
AsyncReadCountersPtr async_read_counters_,
FilesystemReadPrefetchesLogPtr prefetches_log_)
: ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
: ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0)
, impl(std::move(impl_))
, read_settings(settings_)
, reader(reader_)
, prefetch_buffer(settings_.prefetch_buffer_size)
, prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize()))
, query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "")
, current_reader_id(getRandomASCIIString(8))
, log(&Poco::Logger::get("AsynchronousBoundedReadBuffer"))
@ -103,12 +108,10 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
if (!hasPendingDataToRead())
return;
last_prefetch_info.submit_time = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
last_prefetch_info.submit_time = std::chrono::system_clock::now();
last_prefetch_info.priority = priority;
chassert(prefetch_buffer.size() == read_settings.prefetch_buffer_size
|| prefetch_buffer.size() == read_settings.remote_fs_buffer_size);
chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize()));
prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority);
ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches);
}
@ -187,7 +190,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds);
chassert(memory.size() == read_settings.prefetch_buffer_size || memory.size() == read_settings.remote_fs_buffer_size);
chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize()));
std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore);
ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads);

View File

@ -1,11 +1,12 @@
#pragma once
#include "config.h"
#include <IO/ReadBufferFromFile.h>
#include <chrono>
#include <utility>
#include <IO/AsynchronousReader.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadSettings.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <utility>
#include "config.h"
namespace Poco { class Logger; }
@ -71,7 +72,7 @@ private:
struct LastPrefetchInfo
{
UInt64 submit_time = 0;
std::chrono::system_clock::time_point submit_time;
Priority priority;
};
LastPrefetchInfo last_prefetch_info;

View File

@ -124,7 +124,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
else
{
CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
file_segments = cache->getOrSet(cache_key, offset, size, create_settings);
file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings);
}
/**
@ -529,6 +529,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds());
});
OpenTelemetry::SpanHolder span{
fmt::format("CachedOnDiskReadBufferFromFile::predownload(key={}, size={})", file_segment.key().toString(), bytes_to_predownload)};
if (bytes_to_predownload)
{
/// Consider this case. Some user needed segment [a, b] and downloaded it partially.
@ -795,6 +798,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
if (file_segments->empty())
return false;
const size_t original_buffer_size = internal_buffer.size();
bool implementation_buffer_can_be_reused = false;
SCOPE_EXIT({
try
@ -820,6 +825,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
}
}
if (use_external_buffer && !internal_buffer.empty())
internal_buffer.resize(original_buffer_size);
chassert(!file_segment.isDownloader());
}
catch (...)
@ -846,6 +854,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
chassert(!internal_buffer.empty());
/// We allocate buffers not less than 1M so that s3 requests will not be too small. But the same buffers (members of AsynchronousReadIndirectBufferFromRemoteFS)
/// are used for reading from files. Some of these readings are fairly small and their performance degrade when we use big buffers (up to ~20% for queries like Q23 from ClickBench).
if (use_external_buffer && read_type == ReadType::CACHED && settings.local_fs_buffer_size < internal_buffer.size())
internal_buffer.resize(settings.local_fs_buffer_size);
// Pass a valid external buffer for implementation_buffer to read into.
// We then take it back with another swap() after reading is done.
// (If we get an exception in between, we'll be left with an invalid internal_buffer. That's ok, as long as

View File

@ -79,7 +79,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
auto async_read_counters = remote_fs_fd->getReadCounters();
std::optional<AsyncReadIncrement> increment = async_read_counters ? std::optional<AsyncReadIncrement>(async_read_counters) : std::nullopt;
auto watch = std::make_unique<Stopwatch>(CLOCK_MONOTONIC);
auto watch = std::make_unique<Stopwatch>(CLOCK_REALTIME);
Result result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
watch->stop();

View File

@ -1,395 +0,0 @@
#include <base/defines.h>
#include <base/types.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Common/Exception.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
/// Implementation of entropy-learned hashing: https://doi.org/10.1145/3514221.3517894
/// If you change something in this file, please don't deviate too much from the pseudocode in the paper!
/// TODOs for future work:
/// - allow to specify an arbitrary hash function (currently always CityHash is used)
/// - allow function chaining a la entropyLearnedHash(trainEntropyLearnedHash())
/// - support more datatypes for data (besides String)
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int SUPPORT_IS_DISABLED;
}
namespace
{
using PartialKeyPositions = std::vector<size_t>;
using Entropies = std::vector<size_t>;
void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key_positions, String & result)
{
result.clear();
result.reserve(partial_key_positions.size());
for (auto partial_key_position : partial_key_positions)
if (partial_key_position < key.size())
result.push_back(key[partial_key_position]);
}
bool allPartialKeysAreUnique(const std::vector<std::string_view> & keys, const PartialKeyPositions & partial_key_positions)
{
std::unordered_set<String> unique_partial_keys;
unique_partial_keys.reserve(keys.size());
String partial_key;
for (const auto & key : keys)
{
getPartialKey(key, partial_key_positions, partial_key);
if (!unique_partial_keys.insert(partial_key).second)
return false;
}
return true;
}
// NextByte returns position of byte which adds the most entropy and the new entropy
std::pair<size_t, size_t> nextByte(const std::vector<std::string_view> & keys, size_t max_len, PartialKeyPositions & partial_key_positions)
{
size_t min_collisions = std::numeric_limits<size_t>::max();
size_t best_position = 0;
std::unordered_map<String, size_t> count_table;
count_table.reserve(keys.size());
String partial_key;
for (size_t i = 0; i < max_len; ++i)
{
count_table.clear();
partial_key_positions.push_back(i);
size_t collisions = 0;
for (const auto & key : keys)
{
getPartialKey(key, partial_key_positions, partial_key);
collisions += count_table[partial_key]++;
}
if (collisions < min_collisions)
{
min_collisions = collisions;
best_position = i;
}
partial_key_positions.pop_back();
}
return {best_position, min_collisions};
}
std::pair<PartialKeyPositions, Entropies> chooseBytes(const std::vector<std::string_view> & train_data)
{
if (train_data.size() <= 1)
return {};
PartialKeyPositions partial_key_positions;
Entropies entropies;
size_t max_len = 0; /// length of the longest key in training data
for (const auto & key : train_data)
max_len = std::max(max_len, key.size());
while (!allPartialKeysAreUnique(train_data, partial_key_positions))
{
auto [new_position, new_entropy] = nextByte(train_data, max_len, partial_key_positions);
if (!entropies.empty() && new_entropy == entropies.back())
break;
partial_key_positions.push_back(new_position);
entropies.push_back(new_entropy);
}
return {partial_key_positions, entropies};
}
/// Contains global state to convey information between SQL functions
/// - prepareTrainEntropyLearnedHash(),
/// - trainEntropyLearnedHash() and
/// - entropyLearnedHash().
///
/// The reason this machinery is necessary is that ClickHouse processes data in chunks of unpredictable size, yet the training step of
/// entropy-learned hashing needs to process *all* training data in one go. The downside is that the training step becomes quite expensive :-(
class EntropyLearnedHashGlobalState
{
public:
static EntropyLearnedHashGlobalState & instance()
{
static EntropyLearnedHashGlobalState instance;
return instance;
}
/// Called by prepareTrainEntropyLearnedHash()
void cacheTrainingSample(const String & user_name, const String & id, IColumn::MutablePtr column)
{
std::lock_guard lock(mutex);
auto & ids_for_user = global_state[user_name];
auto & training_samples_for_id = ids_for_user[id].training_samples;
training_samples_for_id.push_back(std::move(column));
}
void train(const String & user_name, const String & id)
{
std::lock_guard lock(mutex);
auto & ids_for_user = global_state[user_name];
auto & training_samples = ids_for_user[id].training_samples;
if (training_samples.empty())
return;
auto & concatenated_training_sample = training_samples[0];
for (size_t i = 1; i < training_samples.size(); ++i)
{
auto & other_training_sample = training_samples[i];
concatenated_training_sample->insertRangeFrom(*other_training_sample, 0, other_training_sample->size());
}
const ColumnString * concatenated_training_sample_string = checkAndGetColumn<ColumnString>(*concatenated_training_sample);
if (!concatenated_training_sample_string)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column");
const size_t num_rows = concatenated_training_sample_string->size();
std::vector<std::string_view> training_data;
for (size_t i = 0; i < num_rows; ++i)
{
std::string_view string_view = concatenated_training_sample_string->getDataAt(i).toView();
training_data.emplace_back(string_view);
}
PartialKeyPositions partial_key_positions = chooseBytes(training_data).first;
ids_for_user[id].partial_key_positions = partial_key_positions;
training_samples.clear();
}
const PartialKeyPositions & getPartialKeyPositions(const String & user_name, const String & id) const
{
std::lock_guard lock(mutex);
auto it_user = global_state.find(user_name);
if (it_user == global_state.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
auto it_id = it_user->second.find(id);
if (it_id == it_user->second.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
return it_id->second.partial_key_positions;
}
private:
mutable std::mutex mutex;
/// The state.
struct ColumnsAndPartialKeyPositions
{
/// Caches training data chunks. Filled by prepareTrainEntropyLearnedHash(), cleared by trainEntropyLearnedHash().
MutableColumns training_samples;
/// The result of the training phase. Filled by trainEntropyLearnedHash().
PartialKeyPositions partial_key_positions;
};
/// Maps a state id to the state.
using IdToColumnsAndPartialKeyPositions = std::map<String, ColumnsAndPartialKeyPositions>;
/// Maps the user name to a state id. As a result, the state id is unique at user scope.
using UserNameToId = std::map<String, IdToColumnsAndPartialKeyPositions>;
UserNameToId global_state TSA_GUARDED_BY(mutex);
};
}
/// Copies all chunks of the training sample column into the global state under a given id.
class FunctionPrepareTrainEntropyLearnedHash : public IFunction
{
public:
static constexpr auto name = "prepareTrainEntropyLearnedHash";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettings().allow_experimental_hash_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
return std::make_shared<FunctionPrepareTrainEntropyLearnedHash>(context->getUserName());
}
explicit FunctionPrepareTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"data", &isString<IDataType>, nullptr, "String"},
{"id", &isString<IDataType>, nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
{
const IColumn * id_col = arguments[1].column.get();
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
const String id = id_col_const->getValue<String>();
IColumn::Ptr data_col = arguments[0].column;
IColumn::MutablePtr data_col_mutable = IColumn::mutate(data_col);
auto & global_state = EntropyLearnedHashGlobalState::instance();
global_state.cacheTrainingSample(user_name, id, std::move(data_col_mutable));
const size_t num_rows = data_col->size();
return result_type->createColumnConst(num_rows, 0u); /// dummy output
}
private:
const String user_name;
};
/// 1. Concatenates the training samples of a given id in the global state.
/// 2. Computes the partial key positions from the concatenated training samples and stores that in the global state.
/// 3. clear()-s the training samples in the global state.
class FunctionTrainEntropyLearnedHash : public IFunction
{
public:
static constexpr auto name = "trainEntropyLearnedHash";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettings().allow_experimental_hash_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
return std::make_shared<FunctionTrainEntropyLearnedHash>(context->getUserName());
}
explicit FunctionTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"id", &isString<IDataType>, nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
{
const IColumn * id_col = arguments[0].column.get();
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
if (!id_col_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments.begin()->column->getName(), getName());
auto & global_state = EntropyLearnedHashGlobalState::instance();
const String id = id_col_const->getValue<String>();
global_state.train(user_name, id);
const size_t num_rows = id_col->size();
return result_type->createColumnConst(num_rows, 0u); /// dummy output
}
private:
const String user_name;
};
/// Hashes input strings using partial key positions stored in the global state.
class FunctionEntropyLearnedHash : public IFunction
{
public:
static constexpr auto name = "entropyLearnedHash";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettings().allow_experimental_hash_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Entropy-learned hashing experimental. Set `allow_experimental_hash_functions` setting to enable it");
return std::make_shared<FunctionEntropyLearnedHash>(context->getUserName());
}
explicit FunctionEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"data", &isString<IDataType>, nullptr, "String"},
{"id", &isString<IDataType>, nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);
return std::make_shared<DataTypeUInt64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
{
const IColumn * id_col = arguments.back().column.get();
const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
const String id = id_col_const->getValue<String>();
const auto & global_state = EntropyLearnedHashGlobalState::instance();
const auto & partial_key_positions = global_state.getPartialKeyPositions(user_name, id);
const auto * data_col = arguments[0].column.get();
if (const auto * col_data_string = checkAndGetColumn<ColumnString>(data_col))
{
const size_t num_rows = col_data_string->size();
auto col_res = ColumnUInt64::create(num_rows);
auto & col_res_vec = col_res->getData();
String partial_key;
for (size_t i = 0; i < num_rows; ++i)
{
std::string_view string_ref = col_data_string->getDataAt(i).toView();
getPartialKey(string_ref, partial_key_positions, partial_key);
col_res_vec[i] = CityHash_v1_0_2::CityHash64(partial_key.data(), partial_key.size());
}
return col_res;
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments.begin()->column->getName(), getName());
}
private:
const String user_name;
};
REGISTER_FUNCTION(EntropyLearnedHash)
{
factory.registerFunction<FunctionPrepareTrainEntropyLearnedHash>();
factory.registerFunction<FunctionTrainEntropyLearnedHash>();
factory.registerFunction<FunctionEntropyLearnedHash>();
}
}

View File

@ -0,0 +1,446 @@
#include <Functions/FunctionGenerateRandomStructure.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Interpreters/Context.h>
#include <Common/randomSeed.h>
#include <Common/FunctionDocumentation.h>
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromVector.h>
#include <pcg_random.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
const size_t MAX_NUMBER_OF_COLUMNS = 128;
const size_t MAX_TUPLE_ELEMENTS = 16;
const size_t MAX_DATETIME64_PRECISION = 9;
const size_t MAX_DECIMAL32_PRECISION = 9;
const size_t MAX_DECIMAL64_PRECISION = 18;
const size_t MAX_DECIMAL128_PRECISION = 38;
const size_t MAX_DECIMAL256_PRECISION = 76;
const size_t MAX_DEPTH = 16;
constexpr std::array<TypeIndex, 29> simple_types
{
TypeIndex::Int8,
TypeIndex::UInt8,
TypeIndex::Int16,
TypeIndex::UInt16,
TypeIndex::Int32,
TypeIndex::UInt32,
TypeIndex::Int64,
TypeIndex::UInt64,
TypeIndex::Int128,
TypeIndex::UInt128,
TypeIndex::Int256,
TypeIndex::UInt256,
TypeIndex::Float32,
TypeIndex::Float64,
TypeIndex::Decimal32,
TypeIndex::Decimal64,
TypeIndex::Decimal128,
TypeIndex::Decimal256,
TypeIndex::Date,
TypeIndex::Date32,
TypeIndex::DateTime,
TypeIndex::DateTime64,
TypeIndex::String,
TypeIndex::FixedString,
TypeIndex::Enum8,
TypeIndex::Enum16,
TypeIndex::IPv4,
TypeIndex::IPv6,
TypeIndex::UUID,
};
constexpr std::array<TypeIndex, 5> complex_types
{
TypeIndex::Nullable,
TypeIndex::LowCardinality,
TypeIndex::Array,
TypeIndex::Tuple,
TypeIndex::Map,
};
constexpr std::array<TypeIndex, 22> map_key_types
{
TypeIndex::Int8,
TypeIndex::UInt8,
TypeIndex::Int16,
TypeIndex::UInt16,
TypeIndex::Int32,
TypeIndex::UInt32,
TypeIndex::Int64,
TypeIndex::UInt64,
TypeIndex::Int128,
TypeIndex::UInt128,
TypeIndex::Int256,
TypeIndex::UInt256,
TypeIndex::Date,
TypeIndex::Date32,
TypeIndex::DateTime,
TypeIndex::String,
TypeIndex::FixedString,
TypeIndex::IPv4,
TypeIndex::Enum8,
TypeIndex::Enum16,
TypeIndex::UUID,
TypeIndex::LowCardinality,
};
constexpr std::array<TypeIndex, 22> suspicious_lc_types
{
TypeIndex::Int8,
TypeIndex::UInt8,
TypeIndex::Int16,
TypeIndex::UInt16,
TypeIndex::Int32,
TypeIndex::UInt32,
TypeIndex::Int64,
TypeIndex::UInt64,
TypeIndex::Int128,
TypeIndex::UInt128,
TypeIndex::Int256,
TypeIndex::UInt256,
TypeIndex::Float32,
TypeIndex::Float64,
TypeIndex::Date,
TypeIndex::Date32,
TypeIndex::DateTime,
TypeIndex::String,
TypeIndex::FixedString,
TypeIndex::IPv4,
TypeIndex::IPv6,
TypeIndex::UUID,
};
template <bool allow_complex_types>
constexpr auto getAllTypes()
{
constexpr size_t complex_types_size = complex_types.size() * allow_complex_types;
constexpr size_t result_size = simple_types.size() + complex_types_size;
std::array<TypeIndex, result_size> result;
size_t index = 0;
for (size_t i = 0; i != simple_types.size(); ++i, ++index)
result[index] = simple_types[i];
for (size_t i = 0; i != complex_types_size; ++i, ++index)
result[index] = complex_types[i];
return result;
}
size_t generateNumberOfColumns(pcg64 & rng)
{
return rng() % MAX_NUMBER_OF_COLUMNS + 1;
}
void writeLowCardinalityNestedType(pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types)
{
bool make_nullable = rng() % 2;
if (make_nullable)
writeCString("Nullable(", buf);
if (allow_suspicious_lc_types)
{
TypeIndex type = suspicious_lc_types[rng() % suspicious_lc_types.size()];
if (type == TypeIndex::FixedString)
writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
else
writeString(magic_enum::enum_name<TypeIndex>(type), buf);
}
else
{
/// Support only String and FixedString.
if (rng() % 2)
writeCString("String", buf);
else
writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
}
if (make_nullable)
writeChar(')', buf);
}
void writeEnumValues(const String & column_name, pcg64 & rng, WriteBuffer & buf, ssize_t max_value)
{
/// Don't generate big enums, because it will lead to really big result
/// and slowness of this function, and it can lead to `Max query size exceeded`
/// while using this function with generateRandom.
size_t num_values = rng() % 16 + 1;
std::vector<Int16> values(num_values);
/// Generate random numbers from range [-(max_value + 1), max_value - num_values + 1].
for (Int16 & x : values)
x = rng() % (2 * max_value + 3 - num_values) - max_value - 1;
/// Make all numbers unique.
std::sort(values.begin(), values.end());
for (size_t i = 0; i < num_values; ++i)
values[i] += i;
std::shuffle(values.begin(), values.end(), rng);
for (size_t i = 0; i != num_values; ++i)
{
if (i != 0)
writeCString(", ", buf);
writeString("'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(values[i]), buf);
}
}
void writeMapKeyType(const String & column_name, pcg64 & rng, WriteBuffer & buf)
{
TypeIndex type = map_key_types[rng() % map_key_types.size()];
switch (type)
{
case TypeIndex::FixedString:
writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
break;
case TypeIndex::LowCardinality:
writeCString("LowCardinality(", buf);
/// Map key supports only String and FixedString inside LowCardinality.
if (rng() % 2)
writeCString("String", buf);
else
writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
writeChar(')', buf);
break;
case TypeIndex::Enum8:
writeCString("Enum8(", buf);
writeEnumValues(column_name, rng, buf, INT8_MAX);
writeChar(')', buf);
break;
case TypeIndex::Enum16:
writeCString("Enum16(", buf);
writeEnumValues(column_name, rng, buf, INT16_MAX);
writeChar(')', buf);
break;
default:
writeString(magic_enum::enum_name<TypeIndex>(type), buf);
break;
}
}
template <bool allow_complex_types = true>
void writeRandomType(const String & column_name, pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types, size_t depth = 0)
{
if (allow_complex_types && depth > MAX_DEPTH)
writeRandomType<false>(column_name, rng, buf, depth);
constexpr auto all_types = getAllTypes<allow_complex_types>();
auto type = all_types[rng() % all_types.size()];
switch (type)
{
case TypeIndex::UInt8:
if (rng() % 2)
writeCString("UInt8", buf);
else
writeCString("Bool", buf);
return;
case TypeIndex::FixedString:
writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
return;
case TypeIndex::DateTime64:
writeString("DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION + 1) + ")", buf);
return;
case TypeIndex::Decimal32:
writeString("Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION + 1) + ")", buf);
return;
case TypeIndex::Decimal64:
writeString("Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION + 1) + ")", buf);
return;
case TypeIndex::Decimal128:
writeString("Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION + 1) + ")", buf);
return;
case TypeIndex::Decimal256:
writeString("Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION + 1) + ")", buf);
return;
case TypeIndex::Enum8:
writeCString("Enum8(", buf);
writeEnumValues(column_name, rng, buf, INT8_MAX);
writeChar(')', buf);
return;
case TypeIndex::Enum16:
writeCString("Enum16(", buf);
writeEnumValues(column_name, rng, buf, INT16_MAX);
writeChar(')', buf);
return;
case TypeIndex::LowCardinality:
writeCString("LowCardinality(", buf);
writeLowCardinalityNestedType(rng, buf, allow_suspicious_lc_types);
writeChar(')', buf);
return;
case TypeIndex::Nullable:
{
writeCString("Nullable(", buf);
writeRandomType<false>(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
writeChar(')', buf);
return;
}
case TypeIndex::Array:
{
writeCString("Array(", buf);
writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
writeChar(')', buf);
return;
}
case TypeIndex::Map:
{
writeCString("Map(", buf);
writeMapKeyType(column_name, rng, buf);
writeCString(", ", buf);
writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
writeChar(')', buf);
return;
}
case TypeIndex::Tuple:
{
size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1;
bool generate_nested = rng() % 2;
bool generate_named_tuple = rng() % 2;
if (generate_nested)
writeCString("Nested(", buf);
else
writeCString("Tuple(", buf);
for (size_t i = 0; i != elements; ++i)
{
if (i != 0)
writeCString(", ", buf);
String element_name = "e" + std::to_string(i + 1);
if (generate_named_tuple || generate_nested)
{
writeString(element_name, buf);
writeChar(' ', buf);
}
writeRandomType(element_name, rng, buf, allow_suspicious_lc_types, depth + 1);
}
writeChar(')', buf);
return;
}
default:
writeString(magic_enum::enum_name<TypeIndex>(type), buf);
return;
}
}
void writeRandomStructure(pcg64 & rng, size_t number_of_columns, WriteBuffer & buf, bool allow_suspicious_lc_types)
{
for (size_t i = 0; i != number_of_columns; ++i)
{
if (i != 0)
writeCString(", ", buf);
String column_name = "c" + std::to_string(i + 1);
writeString(column_name, buf);
writeChar(' ', buf);
writeRandomType(column_name, rng, buf, allow_suspicious_lc_types);
}
}
}
DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const
{
if (arguments.size() > 2)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, expected from 0 to 2",
getName(), arguments.size());
for (size_t i = 0; i != arguments.size(); ++i)
{
if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull())
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of the {} argument of function {}, expected unsigned integer or Null",
arguments[i]->getName(),
i + 1,
getName());
}
}
return std::make_shared<DataTypeString>();
}
ColumnPtr FunctionGenerateRandomStructure::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
{
size_t seed = randomSeed();
size_t number_of_columns = 0;
if (!arguments.empty() && !arguments[0].column->onlyNull())
{
number_of_columns = arguments[0].column->getUInt(0);
if (number_of_columns > MAX_NUMBER_OF_COLUMNS)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Maximum allowed number of columns is {}, got {}",
MAX_NUMBER_OF_COLUMNS,
number_of_columns);
}
if (arguments.size() > 1 && !arguments[1].column->onlyNull())
seed = arguments[1].column->getUInt(0);
pcg64 rng(seed);
if (number_of_columns == 0)
number_of_columns = generateNumberOfColumns(rng);
auto col_res = ColumnString::create();
auto & string_column = assert_cast<ColumnString &>(*col_res);
auto & chars = string_column.getChars();
WriteBufferFromVector buf(chars);
writeRandomStructure(rng, number_of_columns, buf, allow_suspicious_lc_types);
buf.finalize();
chars.push_back(0);
string_column.getOffsets().push_back(chars.size());
return ColumnConst::create(std::move(col_res), input_rows_count);
}
String FunctionGenerateRandomStructure::generateRandomStructure(size_t seed, const ContextPtr & context)
{
pcg64 rng(seed);
size_t number_of_columns = generateNumberOfColumns(rng);
WriteBufferFromOwnString buf;
writeRandomStructure(rng, number_of_columns, buf, context->getSettingsRef().allow_suspicious_low_cardinality_types);
return buf.str();
}
REGISTER_FUNCTION(GenerateRandomStructure)
{
factory.registerFunction<FunctionGenerateRandomStructure>(FunctionDocumentation
{
.description=R"(
Generates a random table structure.
This function takes 2 optional constant arguments:
the number of columns in the result structure (random by default) and random seed (random by default)
The maximum number of columns is 128.
The function returns a value of type String.
)",
.examples{
{"random", "SELECT generateRandomStructure()", "c1 UInt32, c2 FixedString(25)"},
{"with specified number of columns", "SELECT generateRandomStructure(3)", "c1 String, c2 Array(Int32), c3 LowCardinality(String)"},
{"with specified seed", "SELECT generateRandomStructure(1, 42)", "c1 UInt128"},
},
.categories{"Random"}
},
FunctionFactory::CaseSensitive);
}
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <pcg_random.hpp>
namespace DB
{
class FunctionGenerateRandomStructure : public IFunction
{
public:
static constexpr auto name = "generateRandomStructure";
explicit FunctionGenerateRandomStructure(bool allow_suspicious_lc_types_) : allow_suspicious_lc_types(allow_suspicious_lc_types_)
{
}
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionGenerateRandomStructure>(context->getSettingsRef().allow_suspicious_low_cardinality_types.value);
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;
static String generateRandomStructure(size_t seed, const ContextPtr & context);
private:
bool allow_suspicious_lc_types;
};
}

View File

@ -195,18 +195,14 @@ void WriteBufferFromS3::finalizeImpl()
if (request_settings.check_objects_after_upload)
{
LOG_TRACE(log, "Checking object {} exists after upload", key);
S3::checkObjectExists(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload");
LOG_TRACE(log, "Checking object {} has size as expected {}", key, total_size);
size_t actual_size = S3::getObjectSize(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage);
if (actual_size != total_size)
throw Exception(
ErrorCodes::S3_ERROR,
"Object {} from bucket {} has unexpected size {} after upload, expected size {}, it's a bug in S3 or S3 API.",
key, bucket, actual_size, total_size);
LOG_TRACE(log, "Object {} exists after upload", key);
}
}
@ -245,10 +241,8 @@ WriteBufferFromS3::~WriteBufferFromS3()
LOG_INFO(log,
"WriteBufferFromS3 is not finalized in destructor. "
"It could be if an exception occurs. File is not written to S3. "
"{}. "
"Stack trace: {}",
getLogDetails(),
StackTrace().toString());
"{}.",
getLogDetails());
}
task_tracker->safeWaitAll();
@ -292,8 +286,6 @@ void WriteBufferFromS3::reallocateFirstBuffer()
WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size);
chassert(offset() == 0);
LOG_TRACE(log, "Reallocated first buffer with size {}. {}", memory.size(), getLogDetails());
}
void WriteBufferFromS3::detachBuffer()
@ -316,8 +308,6 @@ void WriteBufferFromS3::allocateFirstBuffer()
const auto size = std::min(size_t(DBMS_DEFAULT_BUFFER_SIZE), max_first_buffer);
memory = Memory(size);
WriteBuffer::set(memory.data(), memory.size());
LOG_TRACE(log, "Allocated first buffer with size {}. {}", memory.size(), getLogDetails());
}
void WriteBufferFromS3::allocateBuffer()

View File

@ -36,8 +36,6 @@ ThreadPoolCallbackRunner<void> WriteBufferFromS3::TaskTracker::syncRunner()
void WriteBufferFromS3::TaskTracker::waitAll()
{
LOG_TEST(log, "waitAll, in queue {}", futures.size());
/// Exceptions are propagated
for (auto & future : futures)
{
@ -51,8 +49,6 @@ void WriteBufferFromS3::TaskTracker::waitAll()
void WriteBufferFromS3::TaskTracker::safeWaitAll()
{
LOG_TEST(log, "safeWaitAll, wait in queue {}", futures.size());
for (auto & future : futures)
{
if (future.valid())
@ -76,7 +72,6 @@ void WriteBufferFromS3::TaskTracker::safeWaitAll()
void WriteBufferFromS3::TaskTracker::waitIfAny()
{
LOG_TEST(log, "waitIfAny, in queue {}", futures.size());
if (futures.empty())
return;
@ -101,8 +96,6 @@ void WriteBufferFromS3::TaskTracker::waitIfAny()
watch.stop();
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds());
LOG_TEST(log, "waitIfAny ended, in queue {}", futures.size());
}
void WriteBufferFromS3::TaskTracker::add(Callback && func)
@ -147,8 +140,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink()
if (!max_tasks_inflight)
return;
LOG_TEST(log, "waitTilInflightShrink, in queue {}", futures.size());
Stopwatch watch;
/// Alternative approach is to wait until at least futures.size() - max_tasks_inflight element are finished
@ -171,8 +162,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink()
watch.stop();
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds());
LOG_TEST(log, "waitTilInflightShrink ended, in queue {}", futures.size());
}
bool WriteBufferFromS3::TaskTracker::isAsync() const

View File

@ -1,16 +1,35 @@
#include "FileCache.h"
#include <Common/randomSeed.h>
#include <IO/Operators.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadSettings.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromString.h>
#include <Interpreters/Cache/FileCacheSettings.h>
#include <Interpreters/Cache/LRUFileCachePriority.h>
#include <Interpreters/Context.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadSettings.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <pcg-random/pcg_random.hpp>
#include <base/hex.h>
#include <pcg-random/pcg_random.hpp>
#include <Common/randomSeed.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace
{
size_t roundDownToMultiple(size_t num, size_t multiple)
{
return (num / multiple) * multiple;
}
size_t roundUpToMultiple(size_t num, size_t multiple)
{
return roundDownToMultiple(num + multiple - 1, multiple);
}
}
namespace DB
{
@ -26,6 +45,7 @@ FileCache::FileCache(const FileCacheSettings & settings)
, delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms)
, log(&Poco::Logger::get("FileCache"))
, metadata(settings.base_path)
, boundary_alignment(settings.boundary_alignment)
{
main_priority = std::make_unique<LRUFileCachePriority>(settings.max_size, settings.max_elements);
@ -385,15 +405,16 @@ FileSegmentsHolderPtr FileCache::set(
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
}
FileSegmentsHolderPtr FileCache::getOrSet(
const Key & key,
size_t offset,
size_t size,
const CreateFileSegmentSettings & settings)
FileSegmentsHolderPtr
FileCache::getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings)
{
assertInitialized();
FileSegment::Range range(offset, offset + size - 1);
const auto aligned_offset = roundDownToMultiple(offset, boundary_alignment);
const auto aligned_end = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size);
const auto aligned_size = aligned_end - aligned_offset;
FileSegment::Range range(aligned_offset, aligned_offset + aligned_size - 1);
auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
@ -401,8 +422,7 @@ FileSegmentsHolderPtr FileCache::getOrSet(
auto file_segments = getImpl(*locked_key, range);
if (file_segments.empty())
{
file_segments = splitRangeIntoFileSegments(
*locked_key, offset, size, FileSegment::State::EMPTY, settings);
file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings);
}
else
{
@ -410,6 +430,12 @@ FileSegmentsHolderPtr FileCache::getOrSet(
*locked_key, file_segments, range, /* fill_with_detached */false, settings);
}
while (!file_segments.empty() && file_segments.front()->range().right < offset)
file_segments.pop_front();
while (!file_segments.empty() && file_segments.back()->range().left >= offset + size)
file_segments.pop_back();
chassert(!file_segments.empty());
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
}

View File

@ -67,7 +67,8 @@ public:
* As long as pointers to returned file segments are held
* it is guaranteed that these file segments are not removed from cache.
*/
FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
FileSegmentsHolderPtr
getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings);
/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
@ -179,6 +180,8 @@ private:
void assertInitialized() const;
size_t boundary_alignment;
void assertCacheCorrectness();
void loadMetadata();

View File

@ -49,6 +49,8 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true);
boundary_alignment = config.getUInt64(config_prefix + ".boundary_alignment", DBMS_DEFAULT_BUFFER_SIZE);
delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS);
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/Defines.h>
#include <Interpreters/Cache/FileCache_fwd.h>
#include <string>
@ -27,6 +28,8 @@ struct FileCacheSettings
size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS;
size_t boundary_alignment = DBMS_DEFAULT_BUFFER_SIZE;
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
};

View File

@ -4,7 +4,7 @@
namespace DB
{
static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 8 * 1024 * 1024;
static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;

View File

@ -1,13 +1,14 @@
#include "FileSegment.h"
#include <base/getThreadId.h>
#include <Common/scope_guard_safe.h>
#include <base/hex.h>
#include <Common/logger_useful.h>
#include <Interpreters/Cache/FileCache.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <filesystem>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Interpreters/Cache/FileCache.h>
#include <base/getThreadId.h>
#include <base/hex.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/logger_useful.h>
#include <Common/scope_guard_safe.h>
#include <magic_enum.hpp>
@ -399,6 +400,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
FileSegment::State FileSegment::wait(size_t offset)
{
OpenTelemetry::SpanHolder span{fmt::format("FileSegment::wait({})", key().toString())};
auto lock = segment_guard.lock();
if (downloader_id.empty() || offset < getCurrentWriteOffset(true))

View File

@ -1,9 +1,9 @@
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
namespace DB
@ -39,12 +39,12 @@ void FilesystemReadPrefetchesLogElement::appendToBlock(MutableColumns & columns)
columns[i++]->insert(path);
columns[i++]->insert(offset);
columns[i++]->insert(size);
columns[i++]->insert(prefetch_submit_time);
columns[i++]->insert(std::chrono::duration_cast<std::chrono::microseconds>(prefetch_submit_time.time_since_epoch()).count());
columns[i++]->insert(priority.value);
if (execution_watch)
{
columns[i++]->insert(execution_watch->getStart());
columns[i++]->insert(execution_watch->getEnd());
columns[i++]->insert(execution_watch->getStart() / 1000);
columns[i++]->insert(execution_watch->getEnd() / 1000);
columns[i++]->insert(execution_watch->elapsedMicroseconds());
}
else

View File

@ -24,7 +24,7 @@ struct FilesystemReadPrefetchesLogElement
String path;
UInt64 offset;
Int64 size; /// -1 means unknown
Decimal64 prefetch_submit_time{};
std::chrono::system_clock::time_point prefetch_submit_time;
std::optional<Stopwatch> execution_watch;
Priority priority;
FilesystemPrefetchState state;

View File

@ -322,6 +322,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// This does not have impact on the final span logs, because these internal queries are issued by external queries,
/// we still have enough span logs for the execution of external queries.
std::shared_ptr<OpenTelemetry::SpanHolder> query_span = internal ? nullptr : std::make_shared<OpenTelemetry::SpanHolder>("query");
if (query_span)
LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id);
auto query_start_time = std::chrono::system_clock::now();

View File

@ -192,6 +192,9 @@ TEST_F(FileCacheTest, get)
settings.base_path = cache_base_path;
settings.max_size = 30;
settings.max_elements = 5;
settings.boundary_alignment = 1;
const size_t file_size = -1; // the value doesn't really matter because boundary_alignment == 1.
{
std::cerr << "Step 1\n";
@ -200,7 +203,7 @@ TEST_F(FileCacheTest, get)
auto key = cache.createKeyForPath("key1");
{
auto holder = cache.getOrSet(key, 0, 10, {}); /// Add range [0, 9]
auto holder = cache.getOrSet(key, 0, 10, file_size, {}); /// Add range [0, 9]
assertEqual(holder, { Range(0, 9) }, { State::EMPTY });
download(holder->front());
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
@ -219,7 +222,7 @@ TEST_F(FileCacheTest, get)
{
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
auto holder = cache.getOrSet(key, 5, 10, {});
auto holder = cache.getOrSet(key, 5, 10, file_size, {});
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY });
download(get(holder, 1));
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED });
@ -238,20 +241,18 @@ TEST_F(FileCacheTest, get)
/// Get [9, 9]
{
auto holder = cache.getOrSet(key, 9, 1, {});
auto holder = cache.getOrSet(key, 9, 1, file_size, {});
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
increasePriority(holder);
}
assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
/// Get [9, 10]
assertEqual(cache.getOrSet(key, 9, 2, {}),
{ Range(0, 9), Range(10, 14) },
{ State::DOWNLOADED, State::DOWNLOADED });
assertEqual(cache.getOrSet(key, 9, 2, file_size, {}), {Range(0, 9), Range(10, 14)}, {State::DOWNLOADED, State::DOWNLOADED});
/// Get [10, 10]
{
auto holder = cache.getOrSet(key, 10, 1, {});
auto holder = cache.getOrSet(key, 10, 1, file_size, {});
assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED });
increasePriority(holder);
}
@ -264,19 +265,19 @@ TEST_F(FileCacheTest, get)
std::cerr << "Step 4\n";
{
auto holder = cache.getOrSet(key, 17, 4, {});
auto holder = cache.getOrSet(key, 17, 4, file_size, {});
download(holder); /// Get [17, 20]
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 24, 3, {});
auto holder = cache.getOrSet(key, 24, 3, file_size, {});
download(holder); /// Get [24, 26]
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 27, 1, {});
auto holder = cache.getOrSet(key, 27, 1, file_size, {});
download(holder); /// Get [27, 27]
increasePriority(holder);
}
@ -292,7 +293,7 @@ TEST_F(FileCacheTest, get)
std::cerr << "Step 5\n";
{
auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25]
auto holder = cache.getOrSet(key, 0, 26, file_size, {}); /// Get [0, 25]
assertEqual(holder,
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
@ -309,12 +310,12 @@ TEST_F(FileCacheTest, get)
/// Let's not invalidate pointers to returned segments from range [0, 25] and
/// as max elements size is reached, next attempt to put something in cache should fail.
/// This will also check that [27, 27] was indeed evicted.
auto holder2 = cache.getOrSet(key, 27, 1, {});
auto holder2 = cache.getOrSet(key, 27, 1, file_size, {});
assertEqual(holder2, { Range(27, 27) }, { State::EMPTY });
assertDownloadFails(holder2->front());
assertEqual(holder2, { Range(27, 27) }, { State::DETACHED });
auto holder3 = cache.getOrSet(key, 28, 3, {});
auto holder3 = cache.getOrSet(key, 28, 3, file_size, {});
assertEqual(holder3, { Range(28, 30) }, { State::EMPTY });
assertDownloadFails(holder3->front());
assertEqual(holder3, { Range(28, 30) }, { State::DETACHED });
@ -336,7 +337,7 @@ TEST_F(FileCacheTest, get)
std::cerr << "Step 6\n";
{
auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21]
auto holder = cache.getOrSet(key, 12, 10, file_size, {}); /// Get [12, 21]
assertEqual(holder,
{ Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
@ -357,7 +358,7 @@ TEST_F(FileCacheTest, get)
std::cerr << "Step 7\n";
{
auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27]
auto holder = cache.getOrSet(key, 23, 5, file_size, {}); /// Get [23, 27]
assertEqual(holder,
{ Range(23, 23), Range(24, 26), Range(27, 27) },
{ State::EMPTY, State::DOWNLOADED, State::EMPTY });
@ -376,25 +377,25 @@ TEST_F(FileCacheTest, get)
std::cerr << "Step 8\n";
{
auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
auto holder = cache.getOrSet(key, 2, 3, file_size, {}); /// Get [2, 4]
assertEqual(holder, { Range(2, 4) }, { State::EMPTY });
auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
auto holder2 = cache.getOrSet(key, 30, 2, file_size, {}); /// Get [30, 31]
assertEqual(holder2, { Range(30, 31) }, { State::EMPTY });
download(get(holder, 0));
download(get(holder2, 0));
auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
auto holder3 = cache.getOrSet(key, 23, 1, file_size, {}); /// Get [23, 23]
assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED });
auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
auto holder4 = cache.getOrSet(key, 24, 3, file_size, {}); /// Get [24, 26]
assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED });
auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
auto holder5 = cache.getOrSet(key, 27, 1, file_size, {}); /// Get [27, 27]
assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED });
auto holder6 = cache.getOrSet(key, 0, 40, {});
auto holder6 = cache.getOrSet(key, 0, 40, file_size, {});
assertEqual(holder6,
{ Range(0, 1), Range(2, 4), Range(5, 22), Range(23, 23), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31), Range(32, 39) },
{ State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY });
@ -422,14 +423,14 @@ TEST_F(FileCacheTest, get)
/// Get [2, 4]
{
auto holder = cache.getOrSet(key, 2, 3, {});
auto holder = cache.getOrSet(key, 2, 3, file_size, {});
assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED });
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29]
auto holder = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29]
assertEqual(holder,
{ Range(24, 26), Range(27, 27), Range(28, 29) },
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
@ -451,7 +452,7 @@ TEST_F(FileCacheTest, get)
chassert(&DB::CurrentThread::get() == &thread_status_1);
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
auto holder2 = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29] once again.
assertEqual(holder2,
{ Range(24, 26), Range(27, 27), Range(28, 29) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING });
@ -494,7 +495,7 @@ TEST_F(FileCacheTest, get)
/// state is changed not manually via segment->completeWithState(state) but from destructor of holder
/// and notify_all() is also called from destructor of holder.
auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25]
auto holder = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25]
assertEqual(holder,
{ Range(2, 4), Range(5, 23), Range(24, 26) },
{ State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
@ -516,7 +517,7 @@ TEST_F(FileCacheTest, get)
chassert(&DB::CurrentThread::get() == &thread_status_1);
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
auto holder2 = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25] once again
assertEqual(holder,
{ Range(2, 4), Range(5, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED });
@ -560,7 +561,8 @@ TEST_F(FileCacheTest, get)
auto key = cache2.createKeyForPath("key1");
/// Get [2, 29]
assertEqual(cache2.getOrSet(key, 2, 28, {}),
assertEqual(
cache2.getOrSet(key, 2, 28, file_size, {}),
{Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29)},
{State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED});
}
@ -578,7 +580,8 @@ TEST_F(FileCacheTest, get)
auto key = cache2.createKeyForPath("key1");
/// Get [0, 24]
assertEqual(cache2.getOrSet(key, 0, 25, {}),
assertEqual(
cache2.getOrSet(key, 0, 25, file_size, {}),
{Range(0, 9), Range(10, 19), Range(20, 24)},
{State::EMPTY, State::EMPTY, State::EMPTY});
}
@ -598,7 +601,7 @@ TEST_F(FileCacheTest, get)
ASSERT_TRUE(!fs::exists(key_path));
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
download(cache.getOrSet(key, 0, 10, {}));
download(cache.getOrSet(key, 0, 10, file_size, {}));
ASSERT_EQ(cache.getUsedCacheSize(), 10);
ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
@ -628,7 +631,7 @@ TEST_F(FileCacheTest, get)
ASSERT_TRUE(!fs::exists(key_path));
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
download(cache.getOrSet(key, 0, 10, {}));
download(cache.getOrSet(key, 0, 10, file_size, {}));
ASSERT_EQ(cache.getUsedCacheSize(), 10);
ASSERT_TRUE(fs::exists(key_path));
@ -756,7 +759,7 @@ TEST_F(FileCacheTest, temporaryData)
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, 0);
auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{});
{
ASSERT_EQ(some_data_holder->size(), 5);

View File

@ -4,7 +4,7 @@
namespace DB
{
Tokens::Tokens(const char * begin, const char * end, size_t max_query_size)
Tokens::Tokens(const char * begin, const char * end, size_t max_query_size, bool skip_insignificant)
{
Lexer lexer(begin, end, max_query_size);
@ -13,7 +13,7 @@ Tokens::Tokens(const char * begin, const char * end, size_t max_query_size)
{
Token token = lexer.nextToken();
stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded;
if (token.isSignificant())
if (token.isSignificant() || (!skip_insignificant && !data.empty() && data.back().isSignificant()))
data.emplace_back(std::move(token));
} while (!stop);
}

View File

@ -24,7 +24,7 @@ private:
std::size_t last_accessed_index = 0;
public:
Tokens(const char * begin, const char * end, size_t max_query_size = 0);
Tokens(const char * begin, const char * end, size_t max_query_size = 0, bool skip_insignificant = true);
ALWAYS_INLINE inline const Token & operator[](size_t index)
{

View File

@ -233,10 +233,11 @@ ASTPtr tryParseQuery(
const std::string & query_description,
bool allow_multi_statements,
size_t max_query_size,
size_t max_parser_depth)
size_t max_parser_depth,
bool skip_insignificant)
{
const char * query_begin = _out_query_end;
Tokens tokens(query_begin, all_queries_end, max_query_size);
Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant);
/// NOTE: consider use UInt32 for max_parser_depth setting.
IParser::Pos token_iterator(tokens, static_cast<uint32_t>(max_parser_depth));

View File

@ -18,7 +18,8 @@ ASTPtr tryParseQuery(
bool allow_multi_statements, /// If false, check for non-space characters after semicolon and set error message if any.
size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded".
/// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query.
size_t max_parser_depth);
size_t max_parser_depth,
bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces)
/// Parse query or throw an exception with error message.

View File

@ -1,11 +1,11 @@
#include <Storages/MergeTree/MergeTreeReadPool.h>
#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
#include <Storages/MergeTree/MergeTreeReadPool.h>
#include <base/range.h>
#include <Interpreters/Context_fwd.h>
#include <Common/Stopwatch.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <base/range.h>
namespace ProfileEvents
@ -22,6 +22,14 @@ namespace ErrorCodes
namespace DB
{
size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read)
{
ColumnSize columns_size{};
for (const auto & col_name : columns_to_read)
columns_size.add(part.getColumnSize(col_name));
return columns_size.data_compressed;
}
MergeTreeReadPool::MergeTreeReadPool(
size_t threads_,
size_t sum_marks_,
@ -45,16 +53,43 @@ MergeTreeReadPool::MergeTreeReadPool(
, parts_ranges(std::move(parts_))
, predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0)
, do_not_steal_tasks(do_not_steal_tasks_)
, merge_tree_use_const_size_tasks_for_remote_reading(context_->getSettingsRef().merge_tree_use_const_size_tasks_for_remote_reading)
, backoff_settings{context_->getSettingsRef()}
, backoff_state{threads_}
{
/// parts don't contain duplicate MergeTreeDataPart's.
const auto per_part_sum_marks = fillPerPartInfo(
parts_ranges, storage_snapshot, is_part_on_remote_disk,
do_not_steal_tasks, predict_block_size_bytes,
predict_block_size_bytes,
column_names, virtual_column_names, prewhere_info,
actions_settings, reader_settings, per_part_params);
if (std::ranges::count(is_part_on_remote_disk, true))
{
const auto & settings = context_->getSettingsRef();
size_t total_compressed_bytes = 0;
size_t total_marks = 0;
for (const auto & part : parts_ranges)
{
total_compressed_bytes += getApproxSizeOfPart(
*part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_);
total_marks += part.getMarksCount();
}
if (total_marks)
{
const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading;
const auto avg_mark_bytes = std::max<size_t>(total_compressed_bytes / total_marks, 1);
/// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible.
const auto heuristic_min_marks = std::min<size_t>(total_marks / threads_, min_bytes_per_task / avg_mark_bytes);
if (heuristic_min_marks > min_marks_for_concurrent_read)
{
min_marks_for_concurrent_read = heuristic_min_marks;
}
}
}
fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges);
}
@ -62,7 +97,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
const RangesInDataParts & parts,
const StorageSnapshotPtr & storage_snapshot,
std::vector<bool> & is_part_on_remote_disk,
bool & do_not_steal_tasks,
bool & predict_block_size_bytes,
const Names & column_names,
const Names & virtual_column_names,
@ -84,7 +118,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk();
is_part_on_remote_disk[i] = part_on_remote_disk;
do_not_steal_tasks |= part_on_remote_disk;
/// Read marks for every data part.
size_t sum_marks = 0;
@ -160,14 +193,13 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
auto & marks_in_part = thread_tasks.sum_marks_in_parts.back();
size_t need_marks;
if (is_part_on_remote_disk[part_idx]) /// For better performance with remote disks
if (is_part_on_remote_disk[part_idx] && !merge_tree_use_const_size_tasks_for_remote_reading)
need_marks = marks_in_part;
else /// Get whole part to read if it is small enough.
need_marks = std::min(marks_in_part, min_marks_for_concurrent_read);
/// Do not leave too little rows in part for next time.
if (marks_in_part > need_marks &&
marks_in_part - need_marks < min_marks_for_concurrent_read)
if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_for_concurrent_read / 2)
need_marks = marks_in_part;
MarkRanges ranges_to_get_from_part;
@ -300,6 +332,8 @@ void MergeTreeReadPool::fillPerThreadInfo(
parts_queue.push(std::move(info.second));
}
LOG_DEBUG(log, "min_marks_for_concurrent_read={}", min_marks_for_concurrent_read);
const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1;
for (size_t i = 0; i < threads && !parts_queue.empty(); ++i)

View File

@ -94,7 +94,6 @@ public:
const RangesInDataParts & parts,
const StorageSnapshotPtr & storage_snapshot,
std::vector<bool> & is_part_on_remote_disk,
bool & do_not_steal_tasks,
bool & predict_block_size_bytes,
const Names & column_names,
const Names & virtual_column_names,
@ -119,6 +118,7 @@ private:
RangesInDataParts parts_ranges;
bool predict_block_size_bytes;
bool do_not_steal_tasks;
bool merge_tree_use_const_size_tasks_for_remote_reading = false;
std::vector<PerPartParams> per_part_params;
std::vector<bool> is_part_on_remote_disk;
@ -189,7 +189,7 @@ public:
, parts_ranges(std::move(parts_))
{
MergeTreeReadPool::fillPerPartInfo(
parts_ranges, storage_snapshot, is_part_on_remote_disk, do_not_steal_tasks,
parts_ranges, storage_snapshot, is_part_on_remote_disk,
predict_block_size_bytes, column_names, virtual_column_names, prewhere_info,
actions_settings, reader_settings, per_part_params);
@ -226,7 +226,6 @@ private:
const Names virtual_column_names;
RangesInDataParts parts_ranges;
bool do_not_steal_tasks = false;
bool predict_block_size_bytes = false;
std::vector<bool> is_part_on_remote_disk;
std::vector<MergeTreeReadPool::PerPartParams> per_part_params;

View File

@ -207,6 +207,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()
try
{
OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"};
holder->setResult(algorithm->read());
}
catch (...)
@ -221,6 +222,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()
}
#endif
OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"};
return processReadResult(algorithm->read());
}

View File

@ -470,7 +470,7 @@ class GenerateSource : public ISource
{
public:
GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_)
: ISource(Nested::flatten(prepareBlockToFill(block_header_)))
: ISource(Nested::flattenArrayOfTuples(prepareBlockToFill(block_header_)))
, block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
, block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {}
@ -485,7 +485,7 @@ protected:
for (const auto & elem : block_to_fill)
columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context));
columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns();
columns = Nested::flattenArrayOfTuples(block_to_fill.cloneWithColumns(columns)).getColumns();
return {std::move(columns), block_size};
}

View File

@ -225,7 +225,7 @@ public:
/// Cannot just use serializeAsText for array data type even though it converts perfectly
/// any dimension number array into text format, because it encloses in '[]' and for postgres it must be '{}'.
/// Check if array[...] syntax from PostgreSQL will be applicable.
void parseArray(const Field & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
static void parseArray(const Field & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
const auto & nested = array_type->getNestedType();
@ -233,7 +233,7 @@ public:
if (!isArray(nested))
{
writeText(clickhouseToPostgresArray(array, data_type), ostr);
parseArrayContent(array, data_type, ostr);
return;
}
@ -247,7 +247,7 @@ public:
if (!isArray(nested_array_type->getNestedType()))
{
writeText(clickhouseToPostgresArray(iter->get<Array>(), nested), ostr);
parseArrayContent(iter->get<Array>(), nested, ostr);
}
else
{
@ -260,17 +260,36 @@ public:
/// Conversion is done via column casting because with writeText(Array..) got incorrect conversion
/// of Date and DateTime data types and it added extra quotes for values inside array.
static std::string clickhouseToPostgresArray(const Array & array_field, const DataTypePtr & data_type)
static void parseArrayContent(const Array & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
{
auto nested = typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType();
auto array_column = ColumnArray::create(createNested(nested));
auto nested_type = typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType();
auto array_column = ColumnArray::create(createNested(nested_type));
array_column->insert(array_field);
WriteBufferFromOwnString ostr;
data_type->getDefaultSerialization()->serializeText(*array_column, 0, ostr, FormatSettings{});
/// ostr is guaranteed to be at least '[]', i.e. size is at least 2 and 2 only if ostr.str() == '[]'
assert(ostr.str().size() >= 2);
return '{' + std::string(ostr.str().begin() + 1, ostr.str().end() - 1) + '}';
const IColumn & nested_column = array_column->getData();
const auto serialization = nested_type->getDefaultSerialization();
FormatSettings settings;
settings.pretty.charset = FormatSettings::Pretty::Charset::ASCII;
if (nested_type->isNullable())
nested_type = static_cast<const DataTypeNullable *>(nested_type.get())->getNestedType();
/// UUIDs inside arrays are expected to be unquoted in PostgreSQL.
const bool quoted = !isUUID(nested_type);
writeChar('{', ostr);
for (size_t i = 0, size = array_field.size(); i < size; ++i)
{
if (i != 0)
writeChar(',', ostr);
if (quoted)
serialization->serializeTextQuoted(nested_column, i, ostr, settings);
else
serialization->serializeText(nested_column, i, ostr, settings);
}
writeChar('}', ostr);
}
static MutableColumnPtr createNested(DataTypePtr nested)
@ -295,6 +314,7 @@ public:
else if (which.isFloat64()) nested_column = ColumnFloat64::create();
else if (which.isDate()) nested_column = ColumnUInt16::create();
else if (which.isDateTime()) nested_column = ColumnUInt32::create();
else if (which.isUUID()) nested_column = ColumnUUID::create();
else if (which.isDateTime64())
{
nested_column = ColumnDecimal<DateTime64>::create(0, 6);

View File

@ -1,18 +1,19 @@
#include <Common/typeid_cast.h>
#include <Common/Exception.h>
#include <Core/Block.h>
#include <Storages/StorageGenerateRandom.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionGenerateRandom.h>
#include <Functions/FunctionGenerateRandomStructure.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Common/randomSeed.h>
#include "registerTableFunctions.h"
@ -25,10 +26,9 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
ASTs & args_func = ast_function->children;
@ -40,10 +40,21 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
if (args.empty())
return;
if (args.size() > 4)
/// First, check if first argument is structure or seed.
const auto * first_arg_literal = args[0]->as<const ASTLiteral>();
bool first_argument_is_structure = !first_arg_literal || first_arg_literal->value.getType() == Field::Types::String;
size_t max_args = first_argument_is_structure ? 4 : 3;
if (args.size() > max_args)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Table function '{}' requires at most four arguments: "
" structure, [random_seed, max_string_length, max_array_length].", getName());
"Table function '{}' requires at most four (or three if structure is missing) arguments: "
" [structure, random_seed, max_string_length, max_array_length].", getName());
if (first_argument_is_structure)
{
/// Allow constant expression for structure argument, it can be generated using generateRandomStructure function.
args[0] = evaluateConstantExpressionAsLiteral(args[0], context);
}
// All the arguments must be literals.
for (const auto & arg : args)
@ -51,26 +62,39 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
if (!arg->as<const ASTLiteral>())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"All arguments of table function '{}' must be literals. "
"All arguments of table function '{}' except structure argument must be literals. "
"Got '{}' instead", getName(), arg->formatForErrorMessage());
}
}
/// Parsing first argument as table structure and creating a sample block
structure = checkAndGetLiteralArgument<String>(args[0], "structure");
size_t arg_index = 0;
if (args.size() >= 2)
if (first_argument_is_structure)
{
const auto & literal = args[1]->as<const ASTLiteral &>();
/// Parsing first argument as table structure and creating a sample block
structure = checkAndGetLiteralArgument<String>(args[arg_index], "structure");
++arg_index;
}
if (args.size() >= arg_index + 1)
{
const auto & literal = args[arg_index]->as<const ASTLiteral &>();
++arg_index;
if (!literal.value.isNull())
random_seed = checkAndGetLiteralArgument<UInt64>(literal, "random_seed");
}
if (args.size() >= 3)
max_string_length = checkAndGetLiteralArgument<UInt64>(args[2], "max_string_length");
if (args.size() >= arg_index + 1)
{
max_string_length = checkAndGetLiteralArgument<UInt64>(args[arg_index], "max_string_length");
++arg_index;
}
if (args.size() == 4)
max_array_length = checkAndGetLiteralArgument<UInt64>(args[3], "max_string_length");
if (args.size() == arg_index + 1)
{
max_array_length = checkAndGetLiteralArgument<UInt64>(args[arg_index], "max_string_length");
++arg_index;
}
}
ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const
@ -78,11 +102,11 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP
if (structure == "auto")
{
if (structure_hint.empty())
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Table function '{}' was used without structure argument but structure could not be determined automatically. Please, "
"provide structure manually",
getName());
{
auto random_structure = FunctionGenerateRandomStructure::generateRandomStructure(random_seed.value_or(randomSeed()), context);
return parseColumnsListFromString(random_structure, context);
}
return structure_hint;
}

View File

@ -71,11 +71,11 @@ This pull-request will be merged automatically as it reaches the mergeable state
### If the PR was closed and then reopened
If it stuck, check {pr_url} for `{backport_created_label}` and delete it if \
necessary. Manually merging will do nothing, since `{label_backports_created}` \
necessary. Manually merging will do nothing, since `{backport_created_label}` \
prevents the original PR {pr_url} from being processed.
If you want to recreate the PR: delete the `{label_cherrypick}` label and delete this branch.
You may also need to delete the `{label_backports_created}` label from the original PR.
You may also need to delete the `{backport_created_label}` label from the original PR.
"""
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
backporting.

View File

@ -0,0 +1,3 @@
<clickhouse>
</clickhouse>

View File

@ -0,0 +1,63 @@
import pytest
from helpers.client import CommandRequest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/keeper_config.xml"],
with_zookeeper=True,
stay_alive=True,
)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_base_commands(started_cluster):
_ = started_cluster
command = CommandRequest(
[
started_cluster.server_bin_path,
"keeper-client",
"--host",
str(cluster.get_instance_ip("zoo1")),
"--port",
str(cluster.zookeeper_port),
"-q",
"create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;",
],
stdin="",
)
assert command.get_answer() == "testvalue1\n"
def test_four_letter_word_commands(started_cluster):
_ = started_cluster
command = CommandRequest(
[
started_cluster.server_bin_path,
"keeper-client",
"--host",
str(cluster.get_instance_ip("zoo1")),
"--port",
str(cluster.zookeeper_port),
"-q",
"ruok",
],
stdin="",
)
assert command.get_answer() == "imok\n"

View File

@ -123,7 +123,9 @@ def test_postgres_conversions(started_cluster):
g Text[][][][][] NOT NULL, -- String
h Integer[][][], -- Nullable(Int32)
i Char(2)[][][][], -- Nullable(String)
k Char(2)[] -- Nullable(String)
j Char(2)[], -- Nullable(String)
k UUID[], -- Nullable(UUID)
l UUID[][] -- Nullable(UUID)
)"""
)
@ -133,15 +135,18 @@ def test_postgres_conversions(started_cluster):
)
expected = (
"a\tArray(Date)\t\t\t\t\t\n"
+ "b\tArray(DateTime64(6))\t\t\t\t\t\n"
+ "c\tArray(Array(Float32))\t\t\t\t\t\n"
+ "d\tArray(Array(Float64))\t\t\t\t\t\n"
+ "e\tArray(Array(Array(Decimal(5, 5))))\t\t\t\t\t\n"
+ "f\tArray(Array(Array(Int32)))\t\t\t\t\t\n"
+ "g\tArray(Array(Array(Array(Array(String)))))\t\t\t\t\t\n"
+ "h\tArray(Array(Array(Nullable(Int32))))\t\t\t\t\t\n"
+ "i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n"
+ "k\tArray(Nullable(String))"
"b\tArray(DateTime64(6))\t\t\t\t\t\n"
"c\tArray(Array(Float32))\t\t\t\t\t\n"
"d\tArray(Array(Float64))\t\t\t\t\t\n"
"e\tArray(Array(Array(Decimal(5, 5))))\t\t\t\t\t\n"
"f\tArray(Array(Array(Int32)))\t\t\t\t\t\n"
"g\tArray(Array(Array(Array(Array(String)))))\t\t\t\t\t\n"
"h\tArray(Array(Array(Nullable(Int32))))\t\t\t\t\t\n"
"i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n"
"j\tArray(Nullable(String))\t\t\t\t\t\n"
"k\tArray(Nullable(UUID))\t\t\t\t\t\n"
"l\tArray(Array(Nullable(UUID)))"
""
)
assert result.rstrip() == expected
@ -157,7 +162,9 @@ def test_postgres_conversions(started_cluster):
"[[[[['winx', 'winx', 'winx']]]]], "
"[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], "
"[[[[NULL]]]], "
"[]"
"[], "
"['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', '42209d53-d641-4d73-a8b6-c038db1e75d6', NULL], "
"[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]]"
")"
)
@ -167,15 +174,17 @@ def test_postgres_conversions(started_cluster):
)
expected = (
"['2000-05-12','2000-05-12']\t"
+ "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t"
+ "[[1.12345],[1.12345],[1.12345]]\t"
+ "[[1.1234567891],[1.1234567891],[1.1234567891]]\t"
+ "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t"
"['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t"
"[[1.12345],[1.12345],[1.12345]]\t"
"[[1.1234567891],[1.1234567891],[1.1234567891]]\t"
"[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t"
"[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t"
"[[[[['winx','winx','winx']]]]]\t"
"[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t"
"[[[[NULL]]]]\t"
"[]\n"
"[]\t"
"['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a','42209d53-d641-4d73-a8b6-c038db1e75d6',NULL]\t"
"[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\n"
)
assert result == expected

View File

@ -1,5 +1,4 @@
SELECT * FROM generateRandom('i8', 1, 10, 10); -- { serverError 62 }
SELECT * FROM generateRandom; -- { serverError 60 }
SELECT * FROM generateRandom(); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
SELECT * FROM generateRandom('i8 UInt8', 1, 10, 10, 10, 10); -- { serverError 42 }
SELECT * FROM generateRandom('', 1, 10, 10); -- { serverError 62 }

View File

@ -1,2 +1,2 @@
134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
134217728 1048576 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0

View File

@ -266,7 +266,6 @@ encodeURLComponent
encodeURLFormComponent
encodeXMLComponent
endsWith
entropyLearnedHash
equals
erf
erfc
@ -559,7 +558,6 @@ positionCaseInsensitive
positionCaseInsensitiveUTF8
positionUTF8
pow
prepareTrainEntropyLearnedHash
proportionsZTest
protocol
queryID
@ -865,7 +863,6 @@ toYear
toYearWeek
today
tokens
trainEntropyLearnedHash
transactionID
transactionLatestSnapshot
transactionOldestSnapshot

View File

@ -0,0 +1,19 @@
c1 String, c2 UInt256, c3 String, c4 Decimal128(8), c5 UInt128
String
Const(String)
` 90465455320735604871982424534384518837533904778028808627865442405232847164685 5& -303477100882544888461471906106.82821046 75820566154622566322847299106656624693
c1 FixedString(125)
c2 IPv4
c3.e1 Array(Enum16(\'e1V3\' = -24827, \'e1V14\' = -24479, \'e1V8\' = -22478, \'e1V10\' = -13735, \'e1V15\' = -12641, \'e1V11\' = -10191, \'e1V0\' = -8579, \'e1V7\' = -8104, \'e1V6\' = 712, \'e1V12\' = 5683, \'e1V13\' = 13678, \'e1V9\' = 19740, \'e1V5\' = 23066, \'e1V2\' = 23292, \'e1V4\' = 23736, \'e1V1\' = 31672))
c3.e2 Array(Map(Int8, Int32))
c3.e3 Array(Decimal(76, 64))
c3.e4 Array(Int32)
c3.e5 Array(Nullable(Int64))
c3.e6 Array(Int256)
c4 FixedString(183)
c5 IPv4
c6 UInt256
TbÔ#yV·Ü[>ôMá<4D>Á*ܨĘáľ( O‡R8V1n—ŽĎ)Ň˙ň­HË}C¨­¦Ęúţ™\'<27>“ŕóŞI7<49>íĚqnëV)䳆ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁÔ\'G¨Ľ­ 127.48.9.45 ['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14'] [{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}] [153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839] [755855942,1804001770,-78103159,-866181765,731736602,-79599206] [5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871] [17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817] ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wz­Záť:Ѹjú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE><6F>ň’ĎfČFÉO\0ŃýŐ6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"ІXp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\bę®Ń\rü4ĹH 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130
TbÔ#yV·Ü[>ôMá<4D>Á*ܨĘáľ( O‡R8V1n—ŽĎ)Ň˙ň­HË}C¨­¦Ęúţ™\'<27>“ŕóŞI7<49>íĚqnëV)䳆ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁÔ\'G¨Ľ­ 127.48.9.45 ['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14'] [{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}] [153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839] [755855942,1804001770,-78103159,-866181765,731736602,-79599206] [5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871] [17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817] ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wz­Záť:Ѹjú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE><6F>ň’ĎfČFÉO\0ŃýŐ6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"ІXp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\bę®Ń\rü4ĹH 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130
TbÔ#yV·Ü[>ôMá<4D>Á*ܨĘáľ( O‡R8V1n—ŽĎ)Ň˙ň­HË}C¨­¦Ęúţ™\'<27>“ŕóŞI7<49>íĚqnëV)䳆ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁÔ\'G¨Ľ­ 127.48.9.45 ['e1V10'] [{}] [825002272867.1157788721157301271303736024856710948164507982705676578804195475] [1865150610] [7514464811443271056] [33504961604882608369857530219353040639899064613284394558131808339620328539033] ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wz­Záť:Ѹjú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE><6F>ň’ĎfČFÉO\0ŃýŐ6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"ІXp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\bę®Ń\rü4ĹH 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130
c1 LowCardinality(Nullable(UInt64)), c2 Date32, c3 LowCardinality(Nullable(Float64)), c4 Int256, c5 Date32

View File

@ -0,0 +1,20 @@
select generateRandomStructure(5, 42);
select toTypeName(generateRandomStructure(5, 42));
select toColumnTypeName(generateRandomStructure(5, 42));
SELECT * FROM generateRandom(generateRandomStructure(5, 42), 42) LIMIT 1;
select generateRandomStructure(5, 42, 42); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
select generateRandomStructure('5'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
select generateRandomStructure(5, '42'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
select generateRandomStructure(materialize(5), 42); -- {serverError ILLEGAL_COLUMN}
select generateRandomStructure(5, materialize(42)); -- {serverError ILLEGAL_COLUMN}
desc generateRandom(10000000);
select * from generateRandom(10000000) limit 1;
select * from generateRandom(10000000, 2) limit 1;
select * from generateRandom(10000000, 2, 2) limit 1;
select * from generateRandom(10000000, 2, 2, 2) limit 1; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
set allow_suspicious_low_cardinality_types=1;
select generateRandomStructure(5, 4);

View File

@ -1,18 +0,0 @@
0
0
0
0
0
0
2603192927274642682
4947675599669400333
10783339242466472992
0
0
0
0
0
0
2603192927274642682
4947675599669400333
10783339242466472992

View File

@ -1,30 +0,0 @@
-- Tags: no-parallel
-- no-parallel because entropy-learned hash uses global state
SET allow_experimental_hash_functions = 1;
-- no commonalities between keys
DROP TABLE IF EXISTS tbl1;
CREATE TABLE tbl1 (x String) ENGINE=Memory;
INSERT INTO tbl1 VALUES ('a'), ('b'), ('c');
SELECT prepareTrainEntropyLearnedHash(x, 'id1') FROM tbl1;
SELECT trainEntropyLearnedHash('id1') FROM tbl1;
SELECT entropyLearnedHash(x, 'id1') FROM tbl1;
-- with commonalities between keys
DROP TABLE IF EXISTS tbl2;
CREATE TABLE tbl2 (x String) ENGINE=Memory;
INSERT INTO tbl2 VALUES ('aa'), ('ba'), ('ca');
SELECT prepareTrainEntropyLearnedHash(x, 'id2') FROM tbl2;
SELECT trainEntropyLearnedHash('id2') FROM tbl2;
SELECT entropyLearnedHash(x, 'id2') FROM tbl2;
-- negative tests
SELECT prepareTrainEntropyLearnedHash(x, 1) FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT prepareTrainEntropyLearnedHash(1, 'id1') FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT trainEntropyLearnedHash(1) FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT entropyLearnedHash(1, 'id1') FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT entropyLearnedHash(x, 'non-existing id') FROM tbl1; -- { serverError BAD_ARGUMENTS }
DROP TABLE tbl1;
DROP TABLE tbl2;

View File

@ -1453,6 +1453,7 @@ gRPC
gccMurmurHash
gcem
generateRandom
generateRandomStructure
generateULID
generateUUIDv
geoDistance