mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge remote-tracking branch 'origin/master' into test-rabbitmq-with-secure-connection
This commit is contained in:
commit
34f8bf8da5
@ -238,19 +238,19 @@ Example:
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_topic` — Kafka topic.
|
||||
- `_key` — Key of the message.
|
||||
- `_offset` — Offset of the message.
|
||||
- `_timestamp` — Timestamp of the message.
|
||||
- `_timestamp_ms` — Timestamp in milliseconds of the message.
|
||||
- `_partition` — Partition of Kafka topic.
|
||||
- `_headers.name` — Array of message's headers keys.
|
||||
- `_headers.value` — Array of message's headers values.
|
||||
- `_topic` — Kafka topic. Data type: `LowCardinality(String)`.
|
||||
- `_key` — Key of the message. Data type: `String`.
|
||||
- `_offset` — Offset of the message. Data type: `UInt64`.
|
||||
- `_timestamp` — Timestamp of the message Data type: `Nullable(DateTime)`.
|
||||
- `_timestamp_ms` — Timestamp in milliseconds of the message. Data type: `Nullable(DateTime64(3))`.
|
||||
- `_partition` — Partition of Kafka topic. Data type: `UInt64`.
|
||||
- `_headers.name` — Array of message's headers keys. Data type: `Array(String)`.
|
||||
- `_headers.value` — Array of message's headers values. Data type: `Array(String)`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `String`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `String`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
|
||||
|
@ -163,14 +163,14 @@ If you want to change the target table by using `ALTER`, we recommend disabling
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_subject` - NATS message subject.
|
||||
- `_subject` - NATS message subject. Data type: `String`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
@ -184,19 +184,19 @@ Example:
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_exchange_name` - RabbitMQ exchange name.
|
||||
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared.
|
||||
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel.
|
||||
- `_redelivered` - `redelivered` flag of the message.
|
||||
- `_message_id` - messageID of the received message; non-empty if was set, when message was published.
|
||||
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published.
|
||||
- `_exchange_name` - RabbitMQ exchange name. Data type: `String`.
|
||||
- `_channel_id` - ChannelID, on which consumer, who received the message, was declared. Data type: `String`.
|
||||
- `_delivery_tag` - DeliveryTag of the received message. Scoped per channel. Data type: `UInt64`.
|
||||
- `_redelivered` - `redelivered` flag of the message. Data type: `UInt8`.
|
||||
- `_message_id` - messageID of the received message; non-empty if was set, when message was published. Data type: `String`.
|
||||
- `_timestamp` - timestamp of the received message; non-empty if was set, when message was published. Data type: `UInt64`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
||||
## Data formats support {#data-formats-support}
|
||||
|
||||
|
@ -94,12 +94,12 @@ If you want to change the target table by using `ALTER`, we recommend disabling
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_filename` - Name of the log file.
|
||||
- `_offset` - Offset in the log file.
|
||||
- `_filename` - Name of the log file. Data type: `LowCardinality(String)`.
|
||||
- `_offset` - Offset in the log file. Data type: `UInt64`.
|
||||
|
||||
Additional virtual columns when `kafka_handle_error_mode='stream'`:
|
||||
|
||||
- `_raw_record` - Raw record that couldn't be parsed successfully.
|
||||
- `_error` - Exception message happened during failed parsing.
|
||||
- `_raw_record` - Raw record that couldn't be parsed successfully. Data type: `Nullable(String)`.
|
||||
- `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`.
|
||||
|
||||
Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always empty when message was parsed successfully.
|
||||
Note: `_raw_record` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.
|
||||
|
@ -2469,6 +2469,7 @@ This function is designed to load a NumPy array from a .npy file into ClickHouse
|
||||
| u2 | UInt16 |
|
||||
| u4 | UInt32 |
|
||||
| u8 | UInt64 |
|
||||
| f2 | Float32 |
|
||||
| f4 | Float32 |
|
||||
| f8 | Float64 |
|
||||
| S | String |
|
||||
|
@ -172,7 +172,27 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
|
||||
|
||||
## timeout_overflow_mode {#timeout-overflow-mode}
|
||||
|
||||
What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
|
||||
What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`.
|
||||
|
||||
# max_execution_time_leaf
|
||||
|
||||
Similar semantic to `max_execution_time` but only apply on leaf node for distributed or remote queries.
|
||||
|
||||
For example, if we want to limit execution time on leaf node to `10s` but no limit on the initial node, instead of having `max_execution_time` in the nested subquery settings:
|
||||
|
||||
``` sql
|
||||
SELECT count() FROM cluster(cluster, view(SELECT * FROM t SETTINGS max_execution_time = 10));
|
||||
```
|
||||
|
||||
We can use `max_execution_time_leaf` as the query settings:
|
||||
|
||||
``` sql
|
||||
SELECT count() FROM cluster(cluster, view(SELECT * FROM t)) SETTINGS max_execution_time_leaf = 10;
|
||||
```
|
||||
|
||||
# timeout_overflow_mode_leaf
|
||||
|
||||
What to do when the query in leaf node run longer than `max_execution_time_leaf`: `throw` or `break`. By default, `throw`.
|
||||
|
||||
## min_execution_speed {#min-execution-speed}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
---
|
||||
--
|
||||
slug: /en/sql-reference/table-functions/file
|
||||
sidebar_position: 60
|
||||
sidebar_label: file
|
||||
@ -6,7 +6,7 @@ sidebar_label: file
|
||||
|
||||
# file
|
||||
|
||||
Provides a table-like interface to SELECT from and INSERT to files. This table function is similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use file() when working with local files, and s3() when working with buckets in S3, GCS, or MinIO.
|
||||
A table engine which provides a table-like interface to SELECT from and INSERT into files, similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use `file()` when working with local files, and `s3()` when working with buckets in object storage such as S3, GCS, or MinIO.
|
||||
|
||||
The `file` function can be used in `SELECT` and `INSERT` queries to read from or write to files.
|
||||
|
||||
@ -18,18 +18,18 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
|
||||
- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
A table for reading or writing data in a file.
|
||||
|
||||
## File Write Examples
|
||||
## Examples for Writing to a File
|
||||
|
||||
### Write to a TSV file
|
||||
|
||||
@ -48,9 +48,9 @@ As a result, the data is written into the file `test.tsv`:
|
||||
1 3 2
|
||||
```
|
||||
|
||||
### Partitioned Write to multiple TSV files
|
||||
### Partitioned write to multiple TSV files
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into a file() function, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
If you specify a `PARTITION BY` expression when inserting data into a table function of type `file()`, then a separate file is created for each partition. Splitting the data into separate files helps to improve performance of read operations.
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
@ -72,11 +72,11 @@ As a result, the data is written into three files: `test_1.tsv`, `test_2.tsv`, a
|
||||
1 2 3
|
||||
```
|
||||
|
||||
## File Read Examples
|
||||
## Examples for Reading from a File
|
||||
|
||||
### SELECT from a CSV file
|
||||
|
||||
Setting `user_files_path` and the contents of the file `test.csv`:
|
||||
First, set `user_files_path` in the server configuration and prepare a file `test.csv`:
|
||||
|
||||
``` bash
|
||||
$ grep user_files_path /etc/clickhouse-server/config.xml
|
||||
@ -88,7 +88,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
|
||||
78,43,45
|
||||
```
|
||||
|
||||
Getting data from a table in `test.csv` and selecting the first two rows from it:
|
||||
Then, read data from `test.csv` into a table and select its first two rows:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM
|
||||
@ -103,14 +103,6 @@ LIMIT 2;
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM
|
||||
file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
### Inserting data from a file into a table:
|
||||
|
||||
``` sql
|
||||
@ -130,41 +122,42 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
Getting data from table in table.csv, located in archive1.zip or/and archive2.zip
|
||||
Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path {#globs_in_path}
|
||||
## Globbing {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
- `*` — Represents arbitrarily many characters except `/` but including the empty string.
|
||||
- `?` — Represents an arbitrary single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Represents any of alternative strings `'some_string', 'another_string', 'yet_another_one'`. The strings may contain `/`.
|
||||
- `{N..M}` — Represents any number `>= N` and `<= M`.
|
||||
- `**` - Represents all files inside a folder recursively.
|
||||
|
||||
Constructions with `{}` are similar to the [remote](remote.md) table function.
|
||||
|
||||
**Example**
|
||||
|
||||
Suppose we have several files with the following relative paths:
|
||||
Suppose there are these files with the following relative paths:
|
||||
|
||||
- 'some_dir/some_file_1'
|
||||
- 'some_dir/some_file_2'
|
||||
- 'some_dir/some_file_3'
|
||||
- 'another_dir/some_file_1'
|
||||
- 'another_dir/some_file_2'
|
||||
- 'another_dir/some_file_3'
|
||||
- `some_dir/some_file_1`
|
||||
- `some_dir/some_file_2`
|
||||
- `some_dir/some_file_3`
|
||||
- `another_dir/some_file_1`
|
||||
- `another_dir/some_file_2`
|
||||
- `another_dir/some_file_3`
|
||||
|
||||
Query the number of rows in these files:
|
||||
Query the total number of rows in all files:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
|
||||
```
|
||||
|
||||
Query the number of rows in all files of these two directories:
|
||||
An alternative path expression which achieves the same:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
|
||||
@ -176,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from files named `file000`, `file001`, … , `file999`:
|
||||
Query the total number of rows in files named `file000`, `file001`, … , `file999`:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
|
||||
@ -184,7 +177,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from all files inside `big_dir` directory recursively:
|
||||
Query the total number of rows from all files inside directory `big_dir/` recursively:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
|
||||
@ -192,7 +185,7 @@ SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
|
||||
|
||||
**Example**
|
||||
|
||||
Query the data from all `file002` files from any folder inside `big_dir` directory recursively:
|
||||
Query the total number of rows from all files `file002` inside any folder in directory `big_dir/` recursively:
|
||||
|
||||
``` sql
|
||||
SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: remote
|
||||
|
||||
# remote, remoteSecure
|
||||
|
||||
Allows accessing remote servers, including migration of data, without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
|
||||
Table function `remote` allows to access remote servers on-the-fly, i.e. without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Table function `remoteSecure` is same as `remote` but over a secure connection.
|
||||
|
||||
Both functions can be used in `SELECT` and `INSERT` queries.
|
||||
|
||||
@ -21,36 +21,36 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
|
||||
## Parameters
|
||||
|
||||
- `addresses_expr` — An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
|
||||
- `addresses_expr` — A remote server address or an expression that generates multiple addresses of remote servers. Format: `host` or `host:port`.
|
||||
|
||||
The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
|
||||
The `host` can be specified as a server name, or as a IPv4 or IPv6 address. An IPv6 address must be specified in square brackets.
|
||||
|
||||
The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server’s config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
|
||||
The `port` is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the server config file for table function `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) for table function `remoteSecure` (by default, 9440).
|
||||
|
||||
The port is required for an IPv6 address.
|
||||
For IPv6 addresses, a port is required.
|
||||
|
||||
If only specify this parameter, `db` and `table` will use `system.one` by default.
|
||||
If only parameter `addresses_expr` is specified, `db` and `table` will use `system.one` by default.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `table` — Table name. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `user` — User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `password` — User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `user` — User name. If not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
## Returned value
|
||||
|
||||
The dataset from remote servers.
|
||||
A table located on a remote server.
|
||||
|
||||
## Usage
|
||||
|
||||
Unless you are migrating data from one system to another, using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
|
||||
As table functions `remote` and `remoteSecure` re-establish the connection for each request, it is recommended to use a `Distributed` table instead. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and do not use the `remote` table function.
|
||||
|
||||
The `remote` table function can be useful in the following cases:
|
||||
|
||||
- Migrating data from one system to another
|
||||
- Accessing a specific server for data comparison, debugging, and testing.
|
||||
- One-time data migration from one system to another
|
||||
- Accessing a specific server for data comparison, debugging, and testing, i.e. ad-hoc connections.
|
||||
- Queries between various ClickHouse clusters for research purposes.
|
||||
- Infrequent distributed requests that are made manually.
|
||||
- Distributed requests where the set of servers is re-defined each time.
|
||||
@ -68,7 +68,7 @@ localhost
|
||||
[2a02:6b8:0:1111::11]:9000
|
||||
```
|
||||
|
||||
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
|
||||
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing and send the query to all specified addresses (like shards with different data). Example:
|
||||
|
||||
``` text
|
||||
example01-01-1,example01-02-1
|
||||
@ -91,10 +91,13 @@ SELECT * FROM remote_table;
|
||||
```
|
||||
|
||||
### Migration of tables from one system to another:
|
||||
|
||||
This example uses one table from a sample dataset. The database is `imdb`, and the table is `actors`.
|
||||
|
||||
#### On the source ClickHouse system (the system that currently hosts the data)
|
||||
|
||||
- Verify the source database and table name (`imdb.actors`)
|
||||
|
||||
```sql
|
||||
show databases
|
||||
```
|
||||
@ -104,6 +107,7 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
```
|
||||
|
||||
- Get the CREATE TABLE statement from the source:
|
||||
|
||||
```
|
||||
select create_table_query
|
||||
from system.tables
|
||||
@ -111,6 +115,7 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
```
|
||||
|
||||
Response
|
||||
|
||||
```sql
|
||||
CREATE TABLE imdb.actors (`id` UInt32,
|
||||
`first_name` String,
|
||||
@ -123,11 +128,13 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
#### On the destination ClickHouse system:
|
||||
|
||||
- Create the destination database:
|
||||
|
||||
```sql
|
||||
CREATE DATABASE imdb
|
||||
```
|
||||
|
||||
- Using the CREATE TABLE statement from the source, create the destination:
|
||||
|
||||
```sql
|
||||
CREATE TABLE imdb.actors (`id` UInt32,
|
||||
`first_name` String,
|
||||
@ -140,21 +147,23 @@ This example uses one table from a sample dataset. The database is `imdb`, and
|
||||
#### Back on the source deployment:
|
||||
|
||||
Insert into the new database and table created on the remote system. You will need the host, port, username, password, destination database, and destination table.
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION
|
||||
remoteSecure('remote.clickhouse.cloud:9440', 'imdb.actors', 'USER', 'PASSWORD')
|
||||
SELECT * from imdb.actors
|
||||
```
|
||||
|
||||
## Globs in Addresses {#globs-in-addresses}
|
||||
## Globbing {#globs-in-addresses}
|
||||
|
||||
Patterns in curly brackets `{ }` are used to generate a set of shards and to specify replicas. If there are multiple pairs of curly brackets, then the direct product of the corresponding sets is generated.
|
||||
|
||||
The following pattern types are supported.
|
||||
|
||||
- {*a*,*b*} - Any number of variants separated by a comma. The pattern is replaced with *a* in the first shard address and it is replaced with *b* in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
|
||||
- {*n*..*m*} - A range of numbers. This pattern generates shard addresses with incrementing indices from *n* to *m*. `example0{1..2}-1` generates `example01-1` and `example02-1`.
|
||||
- {*0n*..*0m*} - A range of numbers with leading zeroes. This modification preserves leading zeroes in indices. The pattern `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
|
||||
- {*a*|*b*} - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
|
||||
- `{a,b,c}` - Represents any of alternative strings `a`, `b` or `c`. The pattern is replaced with `a` in the first shard address and replaced with `b` in the second shard address and so on. For instance, `example0{1,2}-1` generates addresses `example01-1` and `example02-1`.
|
||||
- `{N..M}` - A range of numbers. This pattern generates shard addresses with incrementing indices from `N` to (and including) `M`. For instance, `example0{1..2}-1` generates `example01-1` and `example02-1`.
|
||||
- `{0n..0m}` - A range of numbers with leading zeroes. This pattern preserves leading zeroes in indices. For instance, `example{01..03}-1` generates `example01-1`, `example02-1` and `example03-1`.
|
||||
- `{a|b}` - Any number of variants separated by a `|`. The pattern specifies replicas. For instance, `example01-{1|2}` generates replicas `example01-1` and `example01-2`.
|
||||
|
||||
The query will be sent to the first healthy replica. However, for `remote` the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting.
|
||||
The number of generated addresses is limited by [table_function_remote_max_addresses](../../operations/settings/settings.md#table_function_remote_max_addresses) setting.
|
||||
|
@ -1797,7 +1797,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
{
|
||||
const auto * logs_level_field = set_query->changes.tryGet(std::string_view{"send_logs_level"});
|
||||
if (logs_level_field)
|
||||
updateLoggerLevel(logs_level_field->safeGet<String>());
|
||||
{
|
||||
auto logs_level = logs_level_field->safeGet<String>();
|
||||
/// Check that setting value is correct before updating logger level.
|
||||
SettingFieldLogsLevelTraits::fromString(logs_level);
|
||||
updateLoggerLevel(logs_level);
|
||||
}
|
||||
}
|
||||
|
||||
if (const auto * create_user_query = parsed_query->as<ASTCreateUserQuery>())
|
||||
|
@ -251,10 +251,12 @@ void LocalConnection::finishQuery()
|
||||
else if (state->pushing_async_executor)
|
||||
{
|
||||
state->pushing_async_executor->finish();
|
||||
state->pushing_async_executor.reset();
|
||||
}
|
||||
else if (state->pushing_executor)
|
||||
{
|
||||
state->pushing_executor->finish();
|
||||
state->pushing_executor.reset();
|
||||
}
|
||||
|
||||
state->io.onFinish();
|
||||
|
@ -330,6 +330,12 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root,
|
||||
{
|
||||
Element & config_element = dynamic_cast<Element &>(*config_node);
|
||||
|
||||
/// Remove substitution attributes from the merge target node if source node already has a value
|
||||
bool source_has_value = with_element.hasChildNodes();
|
||||
if (source_has_value)
|
||||
for (const auto & attr_name: SUBSTITUTION_ATTRS)
|
||||
config_element.removeAttribute(attr_name);
|
||||
|
||||
mergeAttributes(config_element, with_element);
|
||||
mergeRecursive(config, config_node, with_node);
|
||||
}
|
||||
@ -513,6 +519,9 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
|
||||
if (attr_nodes["from_zk"]) /// we have zookeeper subst
|
||||
{
|
||||
if (node->hasChildNodes()) /// only allow substitution for nodes with no value
|
||||
throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_zk substitution");
|
||||
|
||||
contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue());
|
||||
|
||||
if (zk_node_cache)
|
||||
@ -535,6 +544,9 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
|
||||
if (attr_nodes["from_env"]) /// we have env subst
|
||||
{
|
||||
if (node->hasChildNodes()) /// only allow substitution for nodes with no value
|
||||
throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_env substitution");
|
||||
|
||||
XMLDocumentPtr env_document;
|
||||
auto get_env_node = [&](const std::string & name) -> const Node *
|
||||
{
|
||||
|
@ -17,6 +17,11 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/* Transforms string from grep-wildcard-syntax ("{N..M}", "{a,b,c}" as in remote table function and "*", "?") to perl-regexp for using re2 library for matching
|
||||
* with such steps:
|
||||
* 1) search intervals like {0..9} and enums like {abc,xyz,qwe} in {}, replace them by regexp with pipe (expr1|expr2|expr3),
|
||||
@ -116,4 +121,79 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
|
||||
}
|
||||
return buf_final_processing.str();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void expandSelectorGlobImpl(const std::string & path, std::vector<std::string> & for_match_paths_expanded)
|
||||
{
|
||||
/// regexp for {expr1,expr2,....} (a selector glob);
|
||||
/// expr1, expr2,... cannot contain any of these: '{', '}', ','
|
||||
static const re2::RE2 selector_regex(R"({([^{}*,]+,[^{}*]*[^{}*,])})");
|
||||
|
||||
std::string_view path_view(path);
|
||||
std::string_view matched;
|
||||
|
||||
// No (more) selector globs found, quit
|
||||
if (!RE2::FindAndConsume(&path_view, selector_regex, &matched))
|
||||
{
|
||||
for_match_paths_expanded.push_back(path);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<size_t> anchor_positions;
|
||||
bool opened = false;
|
||||
bool closed = false;
|
||||
|
||||
// Looking for first occurrence of {} selector: write down positions of {, } and all intermediate commas
|
||||
for (auto it = path.begin(); it != path.end(); ++it)
|
||||
{
|
||||
if (*it == '{')
|
||||
{
|
||||
if (opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected '{{' found in path '{}' at position {}.", path, it - path.begin());
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
opened = true;
|
||||
}
|
||||
else if (*it == '}')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected '}}' found in path '{}' at position {}.", path, it - path.begin());
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
else if (*it == ',')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected ',' found in path '{}' at position {}.", path, std::distance(path.begin(), it));
|
||||
anchor_positions.push_back(std::distance(path.begin(), it));
|
||||
}
|
||||
}
|
||||
if (!opened || !closed)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid {{}} glob in path {}.", path);
|
||||
|
||||
// generate result: prefix/{a,b,c}/suffix -> [prefix/a/suffix, prefix/b/suffix, prefix/c/suffix]
|
||||
std::string common_prefix = path.substr(0, anchor_positions.front());
|
||||
std::string common_suffix = path.substr(anchor_positions.back() + 1);
|
||||
for (size_t i = 1; i < anchor_positions.size(); ++i)
|
||||
{
|
||||
std::string current_selection =
|
||||
path.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1));
|
||||
|
||||
std::string expanded_matcher = common_prefix + current_selection + common_suffix;
|
||||
expandSelectorGlobImpl(expanded_matcher, for_match_paths_expanded);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
expandSelectorGlobImpl(path, result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,11 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/* Parse globs in string and make a regexp for it.
|
||||
*/
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
|
||||
/// Parse globs in string and make a regexp for it.
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs);
|
||||
|
||||
/// Process {a,b,c...} globs:
|
||||
/// Don't match it against regex, but generate a,b,c strings instead and process each of them separately.
|
||||
/// E.g. for a string like `file{1,2,3}.csv` return vector of strings: {`file1.csv`,`file2.csv`,`file3.csv`}
|
||||
std::vector<std::string> expandSelectionGlob(const std::string & path);
|
||||
}
|
||||
|
@ -365,16 +365,16 @@ class IColumn;
|
||||
M(UInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.", 0) \
|
||||
M(OverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_rows_to_read_leaf, 0, "Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(UInt64, max_bytes_to_read_leaf, 0, "Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.", 0) \
|
||||
M(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
|
||||
M(UInt64, max_rows_to_group_by, 0, "If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.", 0) \
|
||||
M(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(UInt64, max_bytes_before_external_group_by, 0, "If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
|
||||
\
|
||||
M(UInt64, max_rows_to_sort, 0, "If more than specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_bytes_to_sort, 0, "If more than specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_rows_to_sort, 0, "If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(UInt64, max_bytes_to_sort, 0, "If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception", 0) \
|
||||
M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(UInt64, max_bytes_before_external_sort, 0, "If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of available system memory.", 0) \
|
||||
M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \
|
||||
@ -385,8 +385,10 @@ class IColumn;
|
||||
M(OverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
\
|
||||
/* TODO: Check also when merging and finalizing aggregate functions. */ \
|
||||
M(Seconds, max_execution_time, 0, "If query run time exceeded the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode' which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
|
||||
M(Seconds, max_execution_time, 0, "If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.", 0) \
|
||||
M(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
|
||||
M(Seconds, max_execution_time_leaf, 0, "Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception", 0) \
|
||||
M(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, "What to do when the leaf limit is exceeded.", 0) \
|
||||
\
|
||||
M(UInt64, min_execution_speed, 0, "Minimum number of execution rows per second.", 0) \
|
||||
M(UInt64, max_execution_speed, 0, "Maximum number of execution rows per second.", 0) \
|
||||
@ -400,7 +402,7 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, max_sessions_for_user, 0, "Maximum number of simultaneous sessions for a user.", 0) \
|
||||
\
|
||||
M(UInt64, max_subquery_depth, 100, "If a query has more than specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
|
||||
M(UInt64, max_subquery_depth, 100, "If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.", 0) \
|
||||
M(UInt64, max_analyze_depth, 5000, "Maximum number of analyses performed by interpreter.", 0) \
|
||||
M(UInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.", 0) \
|
||||
M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
|
||||
|
@ -17,6 +17,7 @@ enum class NumpyDataTypeIndex
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Float16,
|
||||
Float32,
|
||||
Float64,
|
||||
String,
|
||||
@ -79,6 +80,7 @@ public:
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 2: type_index = NumpyDataTypeIndex::Float16; break;
|
||||
case 4: type_index = NumpyDataTypeIndex::Float32; break;
|
||||
case 8: type_index = NumpyDataTypeIndex::Float64; break;
|
||||
default:
|
||||
|
@ -664,11 +664,20 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf);
|
||||
template <typename ReturnType = void>
|
||||
inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
{
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
/// Optimistic path, when whole value is in buffer.
|
||||
if (!buf.eof() && buf.position() + 10 <= buf.buffer().end())
|
||||
{
|
||||
char * pos = buf.position();
|
||||
|
||||
auto error = [&]
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Cannot parse date here: {}", String(buf.position(), 10));
|
||||
return ReturnType(false);
|
||||
};
|
||||
|
||||
/// YYYY-MM-DD
|
||||
/// YYYY-MM-D
|
||||
/// YYYY-M-DD
|
||||
@ -677,6 +686,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
|
||||
/// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits.
|
||||
|
||||
if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]) || !isNumericASCII(pos[3]))
|
||||
return error();
|
||||
|
||||
UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0');
|
||||
UInt8 month;
|
||||
UInt8 day;
|
||||
@ -685,12 +697,18 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
if (isNumericASCII(pos[-1]))
|
||||
{
|
||||
/// YYYYMMDD
|
||||
if (!isNumericASCII(pos[0]) || !isNumericASCII(pos[1]) || !isNumericASCII(pos[2]))
|
||||
return error();
|
||||
|
||||
month = (pos[-1] - '0') * 10 + (pos[0] - '0');
|
||||
day = (pos[1] - '0') * 10 + (pos[2] - '0');
|
||||
pos += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!isNumericASCII(pos[0]))
|
||||
return error();
|
||||
|
||||
month = pos[0] - '0';
|
||||
if (isNumericASCII(pos[1]))
|
||||
{
|
||||
@ -700,8 +718,8 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
|
||||
else
|
||||
pos += 2;
|
||||
|
||||
if (isNumericASCII(pos[-1]))
|
||||
return ReturnType(false);
|
||||
if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0]))
|
||||
return error();
|
||||
|
||||
day = pos[0] - '0';
|
||||
if (isNumericASCII(pos[1]))
|
||||
|
@ -1,20 +1,21 @@
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/AddDefaultDatabaseVisitor.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
|
||||
#include <Client/IConnections.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/ReadFromRemote.h>
|
||||
#include <Processors/QueryPlan/ExpressionStep.h>
|
||||
@ -22,6 +23,7 @@
|
||||
#include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
|
||||
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DistributedConnectionMissingTable;
|
||||
@ -121,6 +123,7 @@ void SelectStreamFactory::createForShard(
|
||||
if (it != objects_by_shard.end())
|
||||
replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast);
|
||||
|
||||
|
||||
auto emplace_local_stream = [&]()
|
||||
{
|
||||
local_plans.emplace_back(createLocalPlan(
|
||||
|
@ -141,6 +141,14 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
|
||||
new_settings.allow_experimental_parallel_reading_from_replicas = false;
|
||||
}
|
||||
|
||||
if (settings.max_execution_time_leaf.value > 0)
|
||||
{
|
||||
/// Replace 'max_execution_time' of this sub-query with 'max_execution_time_leaf' and 'timeout_overflow_mode'
|
||||
/// with 'timeout_overflow_mode_leaf'
|
||||
new_settings.max_execution_time = settings.max_execution_time_leaf;
|
||||
new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf;
|
||||
}
|
||||
|
||||
auto new_context = Context::createCopy(context);
|
||||
new_context->setSettings(new_settings);
|
||||
return new_context;
|
||||
|
@ -1,20 +1,16 @@
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <Processors/Formats/Impl/NpyRowInputFormat.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Formats/NumpyDataTypes.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
@ -34,6 +30,46 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
float convertFloat16ToFloat32(uint16_t float16_value)
|
||||
{
|
||||
uint16_t sign = (float16_value >> 15) & 0x1;
|
||||
uint16_t exponent = (float16_value >> 10) & 0x1F;
|
||||
uint16_t fraction = float16_value & 0x3FF;
|
||||
|
||||
if (exponent == 0 && fraction == 0)
|
||||
{
|
||||
uint32_t float32_value = sign << 31;
|
||||
return std::bit_cast<float>(float32_value);
|
||||
}
|
||||
|
||||
// Handling special cases for exponent
|
||||
if (exponent == 0x1F)
|
||||
{
|
||||
// NaN or Infinity in float16
|
||||
return (fraction == 0) ? std::numeric_limits<float>::infinity() : std::numeric_limits<float>::quiet_NaN();
|
||||
}
|
||||
|
||||
// Convert exponent from float16 to float32 format
|
||||
int32_t new_exponent = static_cast<int32_t>(exponent) - 15 + 127;
|
||||
|
||||
// Constructing the float32 representation
|
||||
uint32_t float32_value = (static_cast<uint32_t>(sign) << 31) |
|
||||
(static_cast<uint32_t>(new_exponent) << 23) |
|
||||
(static_cast<uint32_t>(fraction) << 13);
|
||||
|
||||
// Interpret the binary representation as a float
|
||||
float result;
|
||||
std::memcpy(&result, &float32_value, sizeof(float));
|
||||
|
||||
// Determine decimal places dynamically based on the magnitude of the number
|
||||
int decimal_places = std::max(0, 6 - static_cast<int>(std::log10(std::abs(result))));
|
||||
// Truncate the decimal part to the determined number of decimal places
|
||||
float multiplier = static_cast<float>(std::pow(10.0f, decimal_places));
|
||||
result = std::round(result * multiplier) / multiplier;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
DataTypePtr getDataTypeFromNumpyType(const std::shared_ptr<NumpyDataType> & numpy_type)
|
||||
{
|
||||
switch (numpy_type->getTypeIndex())
|
||||
@ -54,6 +90,8 @@ DataTypePtr getDataTypeFromNumpyType(const std::shared_ptr<NumpyDataType> & nump
|
||||
return std::make_shared<DataTypeUInt32>();
|
||||
case NumpyDataTypeIndex::UInt64:
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
case NumpyDataTypeIndex::Float16:
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
case NumpyDataTypeIndex::Float32:
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
case NumpyDataTypeIndex::Float64:
|
||||
@ -265,6 +303,17 @@ NpyRowInputFormat::NpyRowInputFormat(ReadBuffer & in_, Block header_, Params par
|
||||
nested_type = getNestedType(types[0]);
|
||||
}
|
||||
|
||||
size_t NpyRowInputFormat::countRows(size_t max_block_size)
|
||||
{
|
||||
size_t count;
|
||||
if (counted_rows + max_block_size <= size_t(header.shape[0]))
|
||||
count = max_block_size;
|
||||
else
|
||||
count = header.shape[0] - counted_rows;
|
||||
counted_rows += count;
|
||||
return count;
|
||||
}
|
||||
|
||||
template <typename ColumnValue, typename DataValue>
|
||||
void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness)
|
||||
{
|
||||
@ -273,7 +322,18 @@ void NpyRowInputFormat::readBinaryValueAndInsert(MutableColumnPtr column, NumpyD
|
||||
readBinaryBigEndian(value, *in);
|
||||
else
|
||||
readBinaryLittleEndian(value, *in);
|
||||
assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(value));
|
||||
assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue((static_cast<ColumnValue>(value)));
|
||||
}
|
||||
|
||||
template <typename ColumnValue>
|
||||
void NpyRowInputFormat::readBinaryValueAndInsertFloat16(MutableColumnPtr column, NumpyDataType::Endianness endianness)
|
||||
{
|
||||
uint16_t value;
|
||||
if (endianness == NumpyDataType::Endianness::BIG)
|
||||
readBinaryBigEndian(value, *in);
|
||||
else
|
||||
readBinaryLittleEndian(value, *in);
|
||||
assert_cast<ColumnVector<ColumnValue> &>(*column).insertValue(static_cast<ColumnValue>(convertFloat16ToFloat32(value)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -300,6 +360,7 @@ void NpyRowInputFormat::readAndInsertFloat(IColumn * column, const DataTypePtr &
|
||||
{
|
||||
switch (npy_type.getTypeIndex())
|
||||
{
|
||||
case NumpyDataTypeIndex::Float16: readBinaryValueAndInsertFloat16<T>(column->getPtr(), npy_type.getEndianness()); break;
|
||||
case NumpyDataTypeIndex::Float32: readBinaryValueAndInsert<T, Float32>(column->getPtr(), npy_type.getEndianness()); break;
|
||||
case NumpyDataTypeIndex::Float64: readBinaryValueAndInsert<T, Float64>(column->getPtr(), npy_type.getEndianness()); break;
|
||||
default:
|
||||
@ -395,13 +456,18 @@ NpySchemaReader::NpySchemaReader(ReadBuffer & in_)
|
||||
|
||||
NamesAndTypesList NpySchemaReader::readSchema()
|
||||
{
|
||||
NumpyHeader header = parseHeader(in);
|
||||
header = parseHeader(in);
|
||||
DataTypePtr nested_type = getDataTypeFromNumpyType(header.numpy_type);
|
||||
DataTypePtr result_type = createNestedArrayType(nested_type, header.shape.size());
|
||||
|
||||
return {{"array", result_type}};
|
||||
}
|
||||
|
||||
std::optional<size_t> NpySchemaReader::readNumberOrRows()
|
||||
{
|
||||
return header.shape[0];
|
||||
}
|
||||
|
||||
void registerInputFormatNpy(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormat("Npy", [](
|
||||
|
@ -29,6 +29,9 @@ public:
|
||||
String getName() const override { return "NpyRowInputFormat"; }
|
||||
|
||||
private:
|
||||
bool supportsCountRows() const override { return true; }
|
||||
size_t countRows(size_t max_block_size) override;
|
||||
|
||||
void readPrefix() override;
|
||||
bool readRow(MutableColumns & columns, RowReadExtension &) override;
|
||||
void readData(MutableColumns & columns);
|
||||
@ -45,12 +48,16 @@ private:
|
||||
template <typename ColumnValue, typename DataValue>
|
||||
void readBinaryValueAndInsert(MutableColumnPtr column, NumpyDataType::Endianness endianness);
|
||||
|
||||
template <typename ColumnValue>
|
||||
void readBinaryValueAndInsertFloat16(MutableColumnPtr column, NumpyDataType::Endianness endianness);
|
||||
|
||||
void readRows(MutableColumns & columns);
|
||||
|
||||
void readValue(IColumn * column);
|
||||
|
||||
DataTypePtr nested_type;
|
||||
NumpyHeader header;
|
||||
size_t counted_rows = 0;
|
||||
};
|
||||
|
||||
class NpySchemaReader : public ISchemaReader
|
||||
@ -59,7 +66,9 @@ public:
|
||||
explicit NpySchemaReader(ReadBuffer & in_);
|
||||
|
||||
private:
|
||||
std::optional<size_t> readNumberOrRows() override;
|
||||
NamesAndTypesList readSchema() override;
|
||||
NumpyHeader header;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -75,59 +75,6 @@ namespace ErrorCodes
|
||||
}
|
||||
namespace
|
||||
{
|
||||
/// Forward-declare to use in expandSelector()
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match);
|
||||
|
||||
/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
|
||||
std::vector<StorageHDFS::PathWithInfo> expandSelector(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match)
|
||||
{
|
||||
std::vector<size_t> anchor_positions = {};
|
||||
bool opened = false, closed = false;
|
||||
|
||||
for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
|
||||
{
|
||||
if (*it == '{')
|
||||
{
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
opened = true;
|
||||
}
|
||||
else if (*it == '}')
|
||||
{
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
else if (*it == ',')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
}
|
||||
}
|
||||
if (!opened || !closed)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid {{}} glob in path {}.", for_match);
|
||||
|
||||
std::vector<StorageHDFS::PathWithInfo> ret = {};
|
||||
|
||||
std::string common_prefix = for_match.substr(0, anchor_positions[0]);
|
||||
std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
|
||||
for (size_t i = 1; i < anchor_positions.size(); ++i)
|
||||
{
|
||||
std::string expanded_matcher = common_prefix
|
||||
+ for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
|
||||
+ common_suffix;
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(path_for_ls, fs, expanded_matcher);
|
||||
ret.insert(ret.end(), result_part.begin(), result_part.end());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Recursive directory listing with matched paths as a result.
|
||||
* Have the same method in StorageFile.
|
||||
*/
|
||||
@ -136,20 +83,24 @@ namespace
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match)
|
||||
{
|
||||
/// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
|
||||
|
||||
std::string_view for_match_view(for_match);
|
||||
std::string_view matched;
|
||||
if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
|
||||
{
|
||||
std::string buffer(matched);
|
||||
if (buffer.find(',') != std::string::npos)
|
||||
return expandSelector(path_for_ls, fs, for_match);
|
||||
}
|
||||
std::vector<StorageHDFS::PathWithInfo> result;
|
||||
|
||||
const size_t first_glob_pos = for_match.find_first_of("*?{");
|
||||
|
||||
if (first_glob_pos == std::string::npos)
|
||||
{
|
||||
const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal();
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsGetPathInfo(fs.get(), path.c_str());
|
||||
if (ls.file_info != nullptr) // NOLINT
|
||||
{
|
||||
result.push_back(StorageHDFS::PathWithInfo{
|
||||
String(path),
|
||||
StorageHDFS::PathInfo{ls.file_info->mLastMod, static_cast<size_t>(ls.file_info->mSize)}});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
|
||||
const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
|
||||
const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
|
||||
@ -171,7 +122,7 @@ namespace
|
||||
throw Exception(
|
||||
ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError()));
|
||||
}
|
||||
std::vector<StorageHDFS::PathWithInfo> result;
|
||||
|
||||
if (!ls.file_info && ls.length > 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
|
||||
for (int i = 0; i < ls.length; ++i)
|
||||
@ -222,7 +173,15 @@ namespace
|
||||
HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
|
||||
HDFSFSPtr fs = createHDFSFS(builder.get());
|
||||
|
||||
auto res = LSWithRegexpMatching("/", fs, path_from_uri);
|
||||
Strings paths = expandSelectionGlob(path_from_uri);
|
||||
|
||||
std::vector<StorageHDFS::PathWithInfo> res;
|
||||
|
||||
for (const auto & path : paths)
|
||||
{
|
||||
auto part_of_res = LSWithRegexpMatching("/", fs, path);
|
||||
res.insert(res.end(), part_of_res.begin(), part_of_res.end());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -106,60 +106,6 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Forward-declare to use in expandSelector()
|
||||
void listFilesWithRegexpMatchingImpl(
|
||||
const std::string & path_for_ls,
|
||||
const std::string & for_match,
|
||||
size_t & total_bytes_to_read,
|
||||
std::vector<std::string> & result,
|
||||
bool recursive = false);
|
||||
|
||||
/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
|
||||
void expandSelector(const std::string & path_for_ls,
|
||||
const std::string & for_match,
|
||||
size_t & total_bytes_to_read,
|
||||
std::vector<std::string> & result,
|
||||
bool recursive)
|
||||
{
|
||||
std::vector<size_t> anchor_positions = {};
|
||||
bool opened = false, closed = false;
|
||||
|
||||
for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
|
||||
{
|
||||
if (*it == '{')
|
||||
{
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
opened = true;
|
||||
}
|
||||
else if (*it == '}')
|
||||
{
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
else if (*it == ',')
|
||||
{
|
||||
if (!opened)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
|
||||
anchor_positions.push_back(std::distance(for_match.begin(), it));
|
||||
}
|
||||
}
|
||||
if (!opened || !closed)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid {{}} glob in path {}.", for_match);
|
||||
|
||||
std::string common_prefix = for_match.substr(0, anchor_positions[0]);
|
||||
std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
|
||||
for (size_t i = 1; i < anchor_positions.size(); ++i)
|
||||
{
|
||||
std::string expanded_matcher = common_prefix
|
||||
+ for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
|
||||
+ common_suffix;
|
||||
listFilesWithRegexpMatchingImpl(path_for_ls, expanded_matcher, total_bytes_to_read, result, recursive);
|
||||
}
|
||||
}
|
||||
|
||||
/* Recursive directory listing with matched paths as a result.
|
||||
* Have the same method in StorageHDFS.
|
||||
*/
|
||||
@ -170,23 +116,23 @@ void listFilesWithRegexpMatchingImpl(
|
||||
std::vector<std::string> & result,
|
||||
bool recursive)
|
||||
{
|
||||
/// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
|
||||
|
||||
std::string_view for_match_view(for_match);
|
||||
std::string_view matched;
|
||||
if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
|
||||
{
|
||||
std::string buffer(matched);
|
||||
if (buffer.find(',') != std::string::npos)
|
||||
{
|
||||
expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t first_glob_pos = for_match.find_first_of("*?{");
|
||||
|
||||
if (first_glob_pos == std::string::npos)
|
||||
{
|
||||
try
|
||||
{
|
||||
fs::path path = fs::canonical(path_for_ls + for_match);
|
||||
result.push_back(path.string());
|
||||
}
|
||||
catch (const std::exception &) // NOLINT
|
||||
{
|
||||
/// There is no such file, but we just ignore this.
|
||||
/// throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", for_match);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
|
||||
const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
|
||||
|
||||
@ -201,7 +147,7 @@ void listFilesWithRegexpMatchingImpl(
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"Cannot compile regex from glob ({}): {}", for_match, matcher.error());
|
||||
|
||||
bool skip_regex = current_glob == "/*" ? true : false;
|
||||
bool skip_regex = current_glob == "/*";
|
||||
if (!recursive)
|
||||
recursive = current_glob == "/**" ;
|
||||
|
||||
@ -239,18 +185,22 @@ void listFilesWithRegexpMatchingImpl(
|
||||
else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher))
|
||||
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
|
||||
listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos),
|
||||
total_bytes_to_read, result);
|
||||
total_bytes_to_read, result, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> listFilesWithRegexpMatching(
|
||||
const std::string & path_for_ls,
|
||||
const std::string & for_match,
|
||||
size_t & total_bytes_to_read)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
listFilesWithRegexpMatchingImpl(path_for_ls, for_match, total_bytes_to_read, result);
|
||||
|
||||
Strings for_match_paths_expanded = expandSelectionGlob(for_match);
|
||||
|
||||
for (const auto & for_match_expanded : for_match_paths_expanded)
|
||||
listFilesWithRegexpMatchingImpl("/", for_match_expanded, total_bytes_to_read, result, false);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -415,7 +365,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
|
||||
else
|
||||
{
|
||||
/// We list only non-directory files.
|
||||
paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read);
|
||||
paths = listFilesWithRegexpMatching(path, total_bytes_to_read);
|
||||
can_be_directory = false;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,17 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_query_size from_env="MAX_QUERY_SIZE" />
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<profile>default</profile>
|
||||
<quota>default</quota>
|
||||
</default>
|
||||
|
||||
<include incl="users_1" />
|
||||
<include incl="users_2" />
|
||||
</users>
|
||||
</clickhouse>
|
@ -0,0 +1,17 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<max_query_size>424242</max_query_size>
|
||||
</default>
|
||||
</profiles>
|
||||
<users>
|
||||
<default>
|
||||
<password></password>
|
||||
<profile>default</profile>
|
||||
<quota>default</quota>
|
||||
</default>
|
||||
|
||||
<include incl="users_1" />
|
||||
<include incl="users_2" />
|
||||
</users>
|
||||
</clickhouse>
|
@ -30,6 +30,15 @@ node6 = cluster.add_instance(
|
||||
},
|
||||
main_configs=["configs/include_from_source.xml"],
|
||||
)
|
||||
node7 = cluster.add_instance(
|
||||
"node7",
|
||||
user_configs=[
|
||||
"configs/000-config_with_env_subst.xml",
|
||||
"configs/010-env_subst_override.xml",
|
||||
],
|
||||
env_variables={"MAX_QUERY_SIZE": "121212"},
|
||||
instance_env_variables=True,
|
||||
) # overridden with 424242
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -78,6 +87,10 @@ def test_config(start_cluster):
|
||||
node6.query("select value from system.settings where name = 'max_query_size'")
|
||||
== "99999\n"
|
||||
)
|
||||
assert (
|
||||
node7.query("select value from system.settings where name = 'max_query_size'")
|
||||
== "424242\n"
|
||||
)
|
||||
|
||||
|
||||
def test_include_config(start_cluster):
|
||||
|
@ -1,4 +1,4 @@
|
||||
SELECT toDate('07-08-2019'); -- { serverError 6 }
|
||||
SELECT toDate('07-08-2019'); -- { serverError 38 }
|
||||
SELECT toDate('2019-0708'); -- { serverError 38 }
|
||||
SELECT toDate('201907-08'); -- { serverError 38 }
|
||||
SELECT toDate('2019^7^8');
|
||||
@ -6,5 +6,5 @@ SELECT toDate('2019^7^8');
|
||||
CREATE TEMPORARY TABLE test (d Date);
|
||||
INSERT INTO test VALUES ('2018-01-01');
|
||||
|
||||
SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 53 }
|
||||
SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 38 }
|
||||
SELECT * FROM test WHERE d >= '2019-07-08';
|
||||
|
@ -0,0 +1,4 @@
|
||||
-- Tags: no-fasttest
|
||||
SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError 159 }
|
||||
-- Can return partial result
|
||||
SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break';
|
@ -84,3 +84,8 @@ c
|
||||
0
|
||||
0
|
||||
1
|
||||
[2.199219,1.099609,3.300781]
|
||||
[4.25,3.34961,6.628906]
|
||||
inf
|
||||
nan
|
||||
0
|
||||
|
@ -56,3 +56,7 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "BAD_ARGUMENTS"
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS"
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')"
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/npy_inf_nan_null.npy')"
|
||||
|
@ -0,0 +1,9 @@
|
||||
3
|
||||
3
|
||||
3
|
||||
array Int64
|
||||
3
|
||||
1000000
|
||||
1000000
|
||||
array Int64
|
||||
1000000
|
19
tests/queries/0_stateless/02908_Npy_files_caching.sh
Executable file
19
tests/queries/0_stateless/02908_Npy_files_caching.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0"
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1"
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy', auto, 'array Int64') settings optimize_count_from_files=1"
|
||||
$CLICKHOUSE_LOCAL -nm -q "
|
||||
desc file('$CURDIR/data_npy/one_dim.npy');
|
||||
select number_of_rows from system.schema_inference_cache where format='Npy';
|
||||
"
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=0"
|
||||
$CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=1"
|
||||
$CLICKHOUSE_LOCAL -nm -q "
|
||||
desc file('$CURDIR/data_npy/npy_big.npy');
|
||||
select number_of_rows from system.schema_inference_cache where format='Npy';
|
||||
"
|
15
tests/queries/0_stateless/02910_bad_logs_level_in_local.sh
Executable file
15
tests/queries/0_stateless/02910_bad_logs_level_in_local.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/expect -f
|
||||
|
||||
log_user 0
|
||||
set timeout 60
|
||||
match_max 100000
|
||||
|
||||
spawn bash -c "clickhouse-local"
|
||||
|
||||
expect ":) "
|
||||
send -- "SET send_logs_level = 't'\r"
|
||||
expect "Exception on client:"
|
||||
expect ":) "
|
||||
send -- "exit\r"
|
||||
expect eof
|
||||
|
@ -0,0 +1,5 @@
|
||||
2020-01-02 SomeString
|
||||
2020-01-02 SomeString
|
||||
2020-01-02 SomeString
|
||||
2020-01-02 SomeString
|
||||
2020-01-02 SomeString
|
25
tests/queries/0_stateless/02916_date_text_parsing.sql
Normal file
25
tests/queries/0_stateless/02916_date_text_parsing.sql
Normal file
@ -0,0 +1,25 @@
|
||||
select * from format(CSV, 'd Date, s String', 'abcdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2bcdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '20cdefgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '202defgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020efgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '20200fgh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '202001gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020010h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '20200102,SomeString');
|
||||
select * from format(CSV, 'd Date, s String', 'abcd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2bcd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '20cd-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '202d-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-ef-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-f-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-f-g,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-0f-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-01-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-01-h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-1-gh,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-1-h,SomeString'); -- {serverError CANNOT_PARSE_DATE}
|
||||
select * from format(CSV, 'd Date, s String', '2020-01-02,SomeString');
|
||||
select * from format(CSV, 'd Date, s String', '2020-01-2,SomeString');
|
||||
select * from format(CSV, 'd Date, s String', '2020-1-2,SomeString');
|
||||
select * from format(CSV, 'd Date, s String', '2020-1-02,SomeString');
|
8
tests/queries/0_stateless/02916_local_insert_into_function.sh
Executable file
8
tests/queries/0_stateless/02916_local_insert_into_function.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "insert into function file('/dev/null', CSV, 'c1 UInt32') values (42)"
|
||||
|
BIN
tests/queries/0_stateless/data_npy/float_16.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/float_16.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/npy_big.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/npy_big.npy
Normal file
Binary file not shown.
BIN
tests/queries/0_stateless/data_npy/npy_inf_nan_null.npy
Normal file
BIN
tests/queries/0_stateless/data_npy/npy_inf_nan_null.npy
Normal file
Binary file not shown.
@ -1543,6 +1543,7 @@ github
|
||||
glibc
|
||||
globalIn
|
||||
globalNotIn
|
||||
globbing
|
||||
glushkovds
|
||||
golang
|
||||
googletest
|
||||
|
Loading…
Reference in New Issue
Block a user