Merge branch 'master' into ADQM-1011

This commit is contained in:
Alexey Gerasimchuck 2023-08-05 20:35:45 +10:00 committed by GitHub
commit c2f94101e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
315 changed files with 3033 additions and 2381 deletions

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.7.1.2470"
ARG VERSION="23.7.2.25"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.7.1.2470"
ARG VERSION="23.7.2.25"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.7.1.2470"
ARG VERSION="23.7.2.25"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -96,5 +96,4 @@ rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &
# Compressed (FIXME: remove once only github actions will be left)
rm /var/log/clickhouse-server/clickhouse-server.log
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:

View File

@ -200,8 +200,8 @@ Templates:
- [Server Setting](_description_templates/template-server-setting.md)
- [Database or Table engine](_description_templates/template-engine.md)
- [System table](_description_templates/template-system-table.md)
- [Data type](_description_templates/data-type.md)
- [Statement](_description_templates/statement.md)
- [Data type](_description_templates/template-data-type.md)
- [Statement](_description_templates/template-statement.md)
<a name="how-to-build-docs"/>

View File

@ -0,0 +1,31 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc)
#### Backward Incompatible Change
* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_<disk_name>(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)).
* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)).
* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)).
* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)).
* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s
Runs [integration tests](tests.md#integration-tests).
## Bugfix validate check
Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label.
## Stress Test
Runs stateless functional tests concurrently from several clients to detect
concurrency-related errors. If it fails:

View File

@ -22,7 +22,7 @@ CREATE TABLE deltalake
- `url` — Bucket url with path to the existing Delta Lake table.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
**Example**

View File

@ -22,7 +22,7 @@ CREATE TABLE hudi_table
- `url` — Bucket url with the path to an existing Hudi table.
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
**Example**

View File

@ -237,7 +237,7 @@ The following settings can be set before query execution or placed into configur
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`.
- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`.
- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file.
Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration.

View File

@ -2131,7 +2131,6 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`.
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
@ -2336,7 +2335,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
@ -2402,7 +2400,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.

View File

@ -1112,17 +1112,6 @@ Default value: 1.
## Arrow format settings {#arrow-format-settings}
### input_format_arrow_import_nested {#input_format_arrow_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Possible values:
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
- 1 — Data can be inserted into `Nested` columns as an array of structs.
Default value: `0`.
### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching}
Ignore case when matching Arrow column names with ClickHouse column names.
@ -1172,17 +1161,6 @@ Default value: `lz4_frame`.
## ORC format settings {#orc-format-settings}
### input_format_orc_import_nested {#input_format_orc_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Possible values:
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
- 1 — Data can be inserted into `Nested` columns as an array of structs.
Default value: `0`.
### input_format_orc_row_batch_size {#input_format_orc_row_batch_size}
Batch size when reading ORC stripes.
@ -1221,17 +1199,6 @@ Default value: `none`.
## Parquet format settings {#parquet-format-settings}
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Possible values:
- 0 — Data can not be inserted into `Nested` columns as an array of structs.
- 1 — Data can be inserted into `Nested` columns as an array of structs.
Default value: `0`.
### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching}
Ignore case when matching Parquet column names with ClickHouse column names.

View File

@ -51,7 +51,3 @@ keeper foo bar
- `rmr <path>` -- Recursively deletes path. Confirmation required
- `flwc <command>` -- Executes four-letter-word command
- `help` -- Prints this message
- `get_stat [path]` -- Returns the node's stat (default `.`)
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stable_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)

View File

@ -34,7 +34,13 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti
A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL.
If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews:
```bash
./clickhouse local -q "SELECT * FROM 'reviews.tsv'"
```
This command is a shortcut of:
```bash
./clickhouse local -q "SELECT * FROM file('reviews.tsv')"

View File

@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control:
[ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not.
[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md).
## Mutations
`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts.

View File

@ -0,0 +1,30 @@
---
slug: /en/sql-reference/statements/alter/named-collection
sidebar_label: NAMED COLLECTION
---
# ALTER NAMED COLLECTION
This query intends to modify already existing named collections.
**Syntax**
```sql
ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster]
[ SET
key_name1 = 'some value',
key_name2 = 'some value',
key_name3 = 'some value',
... ] |
[ DELETE key_name4, key_name5, ... ]
```
**Example**
```sql
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
ALTER NAMED COLLECTION foobar SET a = '2', c = '3';
ALTER NAMED COLLECTION foobar DELETE b;
```

View File

@ -8,13 +8,14 @@ sidebar_label: CREATE
Create queries make a new entity of one of the following kinds:
- [DATABASE](../../../sql-reference/statements/create/database.md)
- [TABLE](../../../sql-reference/statements/create/table.md)
- [VIEW](../../../sql-reference/statements/create/view.md)
- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md)
- [FUNCTION](../../../sql-reference/statements/create/function.md)
- [USER](../../../sql-reference/statements/create/user.md)
- [ROLE](../../../sql-reference/statements/create/role.md)
- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md)
- [QUOTA](../../../sql-reference/statements/create/quota.md)
- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md)
- [DATABASE](/docs/en/sql-reference/statements/create/database.md)
- [TABLE](/docs/en/sql-reference/statements/create/table.md)
- [VIEW](/docs/en/sql-reference/statements/create/view.md)
- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md)
- [FUNCTION](/docs/en/sql-reference/statements/create/function.md)
- [USER](/docs/en/sql-reference/statements/create/user.md)
- [ROLE](/docs/en/sql-reference/statements/create/role.md)
- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md)
- [QUOTA](/docs/en/sql-reference/statements/create/quota.md)
- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md)
- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md)

View File

@ -0,0 +1,34 @@
---
slug: /en/sql-reference/statements/create/named-collection
sidebar_label: NAMED COLLECTION
---
# CREATE NAMED COLLECTION
Creates a new named collection.
**Syntax**
```sql
CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS
key_name1 = 'some value',
key_name2 = 'some value',
key_name3 = 'some value',
...
```
**Example**
```sql
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
```
**Related statements**
- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection)
- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function)
**See Also**
- [Named collections guide](/docs/en/operations/named-collections.md)

View File

@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster]
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
DROP FUNCTION linear_equation;
```
## DROP NAMED COLLECTION
Deletes a named collection.
**Syntax**
``` sql
DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster]
```
**Example**
``` sql
CREATE NAMED COLLECTION foobar AS a = '1', b = '2';
DROP NAMED COLLECTION foobar;
```

View File

@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst
**Syntax**
``` sql
cluster('cluster_name', db.table[, sharding_key])
cluster('cluster_name', db, table[, sharding_key])
clusterAllReplicas('cluster_name', db.table[, sharding_key])
clusterAllReplicas('cluster_name', db, table[, sharding_key])
cluster(['cluster_name', db.table, sharding_key])
cluster(['cluster_name', db, table, sharding_key])
clusterAllReplicas(['cluster_name', db.table, sharding_key])
clusterAllReplicas(['cluster_name', db, table, sharding_key])
```
**Arguments**
- `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.

View File

@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
Engine parameters can be specified using [Named Collections](../../operations/named-collections.md)
Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md).
**Returned value**

View File

@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries.
## Syntax
``` sql
remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
```
## Parameters
@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
The port is required for an IPv6 address.
If only specify this parameter, `db` and `table` will use `system.one` by default.
Type: [String](../../sql-reference/data-types/string.md).
- `db` — Database name. Type: [String](../../sql-reference/data-types/string.md).

View File

@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
```
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested).
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида:
``` bash
@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
```
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested).
### Вывод данных {#selecting-data-arrow}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность
$ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
```
Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested).
### Вывод данных {#selecting-data-2}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:

View File

@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко
В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение.
## input_format_parquet_import_nested {#input_format_parquet_import_nested}
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet).
Возможные значения:
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
Значение по умолчанию: `0`.
## input_format_arrow_import_nested {#input_format_arrow_import_nested}
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow).
Возможные значения:
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
Значение по умолчанию: `0`.
## input_format_orc_import_nested {#input_format_orc_import_nested}
Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc).
Возможные значения:
- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
Значение по умолчанию: `0`.
## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}
Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md).

View File

@ -1,6 +1,5 @@
#include "Commands.h"
#include <queue>
#include "KeeperClient.h"
@ -25,18 +24,8 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
else
path = client->cwd;
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
bool need_space = false;
for (const auto & child : children)
{
if (std::exchange(need_space, true))
std::cout << " ";
std::cout << child;
}
for (const auto & child : client->zookeeper->getChildren(path))
std::cout << child << " ";
std::cout << "\n";
}
@ -88,7 +77,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
client->zookeeper->set(
client->getAbsolutePath(query->args[0].safeGet<String>()),
query->args[1].safeGet<String>(),
static_cast<Int32>(query->args[2].safeGet<Int64>()));
static_cast<Int32>(query->args[2].get<Int32>()));
}
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@ -141,173 +130,6 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
return true;
node->args.push_back(std::move(arg));
return true;
}
void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
Coordination::Stat stat;
String path;
if (!query->args.empty())
path = client->getAbsolutePath(query->args[0].safeGet<String>());
else
path = client->cwd;
client->zookeeper->get(path, &stat);
std::cout << "cZxid = " << stat.czxid << "\n";
std::cout << "mZxid = " << stat.mzxid << "\n";
std::cout << "pZxid = " << stat.pzxid << "\n";
std::cout << "ctime = " << stat.ctime << "\n";
std::cout << "mtime = " << stat.mtime << "\n";
std::cout << "version = " << stat.version << "\n";
std::cout << "cversion = " << stat.cversion << "\n";
std::cout << "aversion = " << stat.aversion << "\n";
std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n";
std::cout << "dataLength = " << stat.dataLength << "\n";
std::cout << "numChildren = " << stat.numChildren << "\n";
}
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
ASTPtr threshold;
if (!ParserUnsignedInteger{}.parse(pos, threshold, expected))
return false;
node->args.push_back(threshold->as<ASTLiteral &>().value);
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
return true;
}
void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto threshold = query->args[0].safeGet<UInt64>();
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
Coordination::Stat stat;
client->zookeeper->get(path, &stat);
if (stat.numChildren >= static_cast<Int32>(threshold))
{
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
return;
}
auto children = client->zookeeper->getChildren(path);
std::sort(children.begin(), children.end());
for (const auto & child : children)
{
auto next_query = *query;
next_query.args[1] = DB::Field(path / child);
execute(&next_query, client);
}
}
bool DeleteStableBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;
}
void DeleteStableBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const
{
client->askConfirmation(
"You are going to delete all inactive backups in /clickhouse/backups.",
[client]
{
fs::path backup_root = "/clickhouse/backups";
auto backups = client->zookeeper->getChildren(backup_root);
std::sort(backups.begin(), backups.end());
for (const auto & child : backups)
{
auto backup_path = backup_root / child;
std::cout << "Found backup " << backup_path << ", checking if it's active\n";
String stage_path = backup_path / "stage";
auto stages = client->zookeeper->getChildren(stage_path);
bool is_active = false;
for (const auto & stage : stages)
{
if (startsWith(stage, "alive"))
{
is_active = true;
break;
}
}
if (is_active)
{
std::cout << "Backup " << backup_path << " is active, not going to delete\n";
continue;
}
std::cout << "Backup " << backup_path << " is not active, deleting it\n";
client->zookeeper->removeRecursive(backup_path);
}
});
}
bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
path = ".";
node->args.push_back(std::move(path));
ASTPtr count;
if (ParserUnsignedInteger{}.parse(pos, count, expected))
node->args.push_back(count->as<ASTLiteral &>().value);
else
node->args.push_back(UInt64(10));
return true;
}
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
auto n = query->args[1].safeGet<UInt64>();
std::vector<std::tuple<Int32, String>> result;
std::queue<fs::path> queue;
queue.push(path);
while (!queue.empty())
{
auto next_path = queue.front();
queue.pop();
auto children = client->zookeeper->getChildren(next_path);
std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; });
auto response = client->zookeeper->get(children);
for (size_t i = 0; i < response.size(); ++i)
{
result.emplace_back(response[i].stat.numChildren, children[i]);
queue.push(children[i]);
}
}
std::sort(result.begin(), result.end(), std::greater());
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
@ -348,7 +170,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery
void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
{
for (const auto & pair : KeeperClient::commands)
std::cout << pair.second->generateHelpString() << "\n";
std::cout << pair.second->getHelpMessage() << "\n";
}
bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const

View File

@ -21,12 +21,6 @@ public:
virtual String getName() const = 0;
virtual ~IKeeperClientCommand() = default;
String generateHelpString() const
{
return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName()));
}
};
using Command = std::shared_ptr<IKeeperClientCommand>;
@ -40,7 +34,7 @@ class LSCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; }
String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
};
class CDCommand : public IKeeperClientCommand
@ -51,7 +45,7 @@ class CDCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
};
class SetCommand : public IKeeperClientCommand
@ -64,7 +58,7 @@ class SetCommand : public IKeeperClientCommand
String getHelpMessage() const override
{
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
}
};
@ -76,7 +70,7 @@ class CreateCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> <value> -- Creates new node"; }
String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
};
class GetCommand : public IKeeperClientCommand
@ -87,63 +81,9 @@ class GetCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; }
};
class FindSuperNodes : public IKeeperClientCommand
{
String getName() const override { return "find_super_nodes"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} <threshold> [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)";
}
};
class DeleteStableBackups : public IKeeperClientCommand
{
String getName() const override { return "delete_stable_backups"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} -- Deletes ClickHouse nodes used for backups that are now inactive";
}
};
class FindBigFamily : public IKeeperClientCommand
{
String getName() const override { return "find_big_family"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override
{
return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)";
}
};
class RMCommand : public IKeeperClientCommand
{
String getName() const override { return "rm"; }
@ -152,7 +92,7 @@ class RMCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
};
class RMRCommand : public IKeeperClientCommand
@ -163,7 +103,7 @@ class RMRCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
};
class HelpCommand : public IKeeperClientCommand
@ -174,7 +114,7 @@ class HelpCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} -- Prints this message"; }
String getHelpMessage() const override { return "help -- Prints this message"; }
};
class FourLetterWordCommand : public IKeeperClientCommand
@ -185,7 +125,7 @@ class FourLetterWordCommand : public IKeeperClientCommand
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <command> -- Executes four-letter-word command"; }
String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
};
}

View File

@ -177,10 +177,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<SetCommand>(),
std::make_shared<CreateCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStableBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<HelpCommand>(),
@ -270,8 +266,16 @@ void KeeperClient::runInteractive()
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {};
char word_break_characters[] = " \t\v\f\a\b\r\n/";
ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {});
ReplxxLineReader lr(
suggest,
history_file,
/* multiline= */ false,
query_extenders,
query_delimiters,
word_break_characters,
/* highlighter_= */ {});
lr.enableBracketedPaste();
while (true)

View File

@ -58,7 +58,6 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
String command_name(pos->begin, pos->end);
std::transform(command_name.begin(), command_name.end(), command_name.begin(), [](unsigned char c) { return std::tolower(c); });
Command command;
auto iter = KeeperClient::commands.find(command_name);

View File

@ -466,6 +466,11 @@ int main(int argc_, char ** argv_)
checkHarmfulEnvironmentVariables(argv_);
#endif
/// This is used for testing. For example,
/// clickhouse-local should be able to run a simple query without throw/catch.
if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
DB::terminate_on_any_exception = true;
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);

View File

@ -1650,6 +1650,7 @@ try
database_catalog.initializeAndLoadTemporaryDatabase();
loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context);
startupSystemTables();
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
@ -1668,7 +1669,6 @@ try
/// Then, load remaining databases
loadMetadata(global_context, default_database);
convertDatabasesEnginesIfNeed(global_context);
startupSystemTables();
database_catalog.startupBackgroundCleanup();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);

231
rust/Cargo.lock generated
View File

@ -78,6 +78,55 @@ dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
[[package]]
name = "anstyle-parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.72"
@ -89,9 +138,9 @@ dependencies = [
[[package]]
name = "ariadne"
version = "0.2.0"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
dependencies = [
"unicode-width",
"yansi",
@ -142,6 +191,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "blake3"
version = "1.4.1"
@ -204,7 +259,7 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"hashbrown",
"stacker",
]
@ -218,6 +273,12 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "constant_time_eq"
version = "0.3.0"
@ -488,21 +549,36 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
"syn 2.0.27",
]
[[package]]
name = "equivalent"
version = "1.0.1"
name = "errno"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
dependencies = [
"errno-dragonfly",
"libc",
"windows-sys",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "fnv"
@ -555,12 +631,6 @@ dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
@ -603,13 +673,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "2.0.0"
name = "is-terminal"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
"hermit-abi",
"rustix",
"windows-sys",
]
[[package]]
@ -621,6 +692,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.9"
@ -657,6 +737,12 @@ dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
[[package]]
name = "log"
version = "0.4.19"
@ -708,7 +794,7 @@ version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069"
dependencies = [
"bitflags",
"bitflags 1.3.2",
"cfg-if",
"libc",
]
@ -720,7 +806,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
dependencies = [
"autocfg",
"bitflags",
"bitflags 1.3.2",
"cfg-if",
"libc",
"memoffset 0.6.5",
@ -787,31 +873,55 @@ dependencies = [
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
name = "prql-ast"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
checksum = "71194e75f14dbe7debdf2b5eca0812c978021a1bd23d6fe1da98b58e407e035a"
dependencies = [
"enum-as-inner",
"semver",
"serde",
"strum",
]
[[package]]
name = "prql-compiler"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ff28e838b1be4227cc567a75c11caa3be25c5015f0e5fd21279c06e944ba44f"
dependencies = [
"anstream",
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"itertools 0.11.0",
"log",
"once_cell",
"prql-ast",
"prql-parser",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "prql-parser"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3182e2ef0465a960eb02519b18768e39123d3c3a0037a2d2934055a3ef901870"
dependencies = [
"chumsky",
"itertools 0.11.0",
"prql-ast",
"semver",
]
[[package]]
name = "psm"
version = "0.1.21"
@ -858,7 +968,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
"bitflags 1.3.2",
]
[[package]]
@ -907,6 +1017,19 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
version = "0.38.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f"
dependencies = [
"bitflags 2.3.3",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "rustversion"
version = "1.0.14"
@ -971,19 +1094,6 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "skim"
version = "0.10.4"
@ -991,7 +1101,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
dependencies = [
"beef",
"bitflags",
"bitflags 1.3.2",
"chrono",
"crossbeam",
"defer-drop",
@ -1015,16 +1125,16 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"itertools 0.10.5",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87"
dependencies = [
"log",
"serde",
@ -1051,24 +1161,24 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.1"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
"syn 2.0.27",
]
[[package]]
@ -1191,7 +1301,7 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e19c6ab038babee3d50c8c12ff8b910bdb2196f62278776422f50390d8e53d8"
dependencies = [
"bitflags",
"bitflags 1.3.2",
"lazy_static",
"log",
"nix 0.24.3",
@ -1223,12 +1333,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -1368,6 +1472,15 @@ dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.1"

View File

@ -1,12 +1,12 @@
[package]
edition = "2021"
name = "_ch_rust_prql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.8.1"
prql-compiler = "0.9.3"
serde_json = "1.0"
[lib]

View File

@ -6,6 +6,7 @@
#include <Access/DiskAccessStorage.h>
#include <Access/LDAPAccessStorage.h>
#include <Access/ContextAccess.h>
#include <Access/EnabledSettings.h>
#include <Access/EnabledRolesInfo.h>
#include <Access/RoleCache.h>
#include <Access/RowPolicyCache.h>
@ -729,6 +730,14 @@ std::shared_ptr<const EnabledRoles> AccessControl::getEnabledRoles(
}
std::shared_ptr<const EnabledRolesInfo> AccessControl::getEnabledRolesInfo(
const std::vector<UUID> & current_roles,
const std::vector<UUID> & current_roles_with_admin_option) const
{
return getEnabledRoles(current_roles, current_roles_with_admin_option)->getRolesInfo();
}
std::shared_ptr<const EnabledRowPolicies> AccessControl::getEnabledRowPolicies(const UUID & user_id, const boost::container::flat_set<UUID> & enabled_roles) const
{
return row_policy_cache->getEnabledRowPolicies(user_id, enabled_roles);
@ -772,6 +781,15 @@ std::shared_ptr<const EnabledSettings> AccessControl::getEnabledSettings(
return settings_profiles_cache->getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles);
}
std::shared_ptr<const SettingsProfilesInfo> AccessControl::getEnabledSettingsInfo(
const UUID & user_id,
const SettingsProfileElements & settings_from_user,
const boost::container::flat_set<UUID> & enabled_roles,
const SettingsProfileElements & settings_from_enabled_roles) const
{
return getEnabledSettings(user_id, settings_from_user, enabled_roles, settings_from_enabled_roles)->getInfo();
}
std::shared_ptr<const SettingsProfilesInfo> AccessControl::getSettingsProfileInfo(const UUID & profile_id)
{
return settings_profiles_cache->getSettingsProfileInfo(profile_id);

View File

@ -29,6 +29,7 @@ class ContextAccessParams;
struct User;
using UserPtr = std::shared_ptr<const User>;
class EnabledRoles;
struct EnabledRolesInfo;
class RoleCache;
class EnabledRowPolicies;
class RowPolicyCache;
@ -187,6 +188,10 @@ public:
const std::vector<UUID> & current_roles,
const std::vector<UUID> & current_roles_with_admin_option) const;
std::shared_ptr<const EnabledRolesInfo> getEnabledRolesInfo(
const std::vector<UUID> & current_roles,
const std::vector<UUID> & current_roles_with_admin_option) const;
std::shared_ptr<const EnabledRowPolicies> getEnabledRowPolicies(
const UUID & user_id,
const boost::container::flat_set<UUID> & enabled_roles) const;
@ -209,6 +214,12 @@ public:
const boost::container::flat_set<UUID> & enabled_roles,
const SettingsProfileElements & settings_from_enabled_roles) const;
std::shared_ptr<const SettingsProfilesInfo> getEnabledSettingsInfo(
const UUID & user_id,
const SettingsProfileElements & settings_from_user,
const boost::container::flat_set<UUID> & enabled_roles,
const SettingsProfileElements & settings_from_enabled_roles) const;
std::shared_ptr<const SettingsProfilesInfo> getSettingsProfileInfo(const UUID & profile_id);
const ExternalAuthenticators & getExternalAuthenticators() const;

View File

@ -6887,13 +6887,12 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
scope.scope_node->formatASTForErrorMessage());
}
std::erase_if(with_nodes, [](const QueryTreeNodePtr & node)
{
auto * subquery_node = node->as<QueryNode>();
auto * union_node = node->as<UnionNode>();
return (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
});
/** WITH section can be safely removed, because WITH section only can provide aliases to query expressions
* and CTE for other sections to use.
*
* Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table);
*/
query_node_typed.getWith().getNodes().clear();
for (auto & window_node : query_node_typed.getWindow().getNodes())
{
@ -6952,9 +6951,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
scope.scope_node->formatASTForErrorMessage());
}
if (query_node_typed.hasWith())
resolveExpressionNodeList(query_node_typed.getWithNode(), scope, true /*allow_lambda_expression*/, false /*allow_table_expression*/);
if (query_node_typed.getPrewhere())
resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
@ -7123,13 +7119,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
scope.scope_node->formatASTForErrorMessage());
}
/** WITH section can be safely removed, because WITH section only can provide aliases to query expressions
* and CTE for other sections to use.
*
* Example: WITH 1 AS constant, (x -> x + 1) AS lambda, a AS (SELECT * FROM test_table);
*/
query_node_typed.getWith().getNodes().clear();
/** WINDOW section can be safely removed, because WINDOW section can only provide window definition to window functions.
*
* Example: SELECT count(*) OVER w FROM test_table WINDOW w AS (PARTITION BY id);

View File

@ -77,10 +77,12 @@ BackupEntriesCollector::BackupEntriesCollector(
const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_)
: backup_query_elements(backup_query_elements_)
, backup_settings(backup_settings_)
, backup_coordination(backup_coordination_)
, read_settings(read_settings_)
, context(context_)
, on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000))
, consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))

View File

@ -30,6 +30,7 @@ public:
BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_,
const BackupSettings & backup_settings_,
std::shared_ptr<IBackupCoordination> backup_coordination_,
const ReadSettings & read_settings_,
const ContextPtr & context_);
~BackupEntriesCollector();
@ -40,6 +41,7 @@ public:
const BackupSettings & getBackupSettings() const { return backup_settings; }
std::shared_ptr<IBackupCoordination> getBackupCoordination() const { return backup_coordination; }
const ReadSettings & getReadSettings() const { return read_settings; }
ContextPtr getContext() const { return context; }
/// Adds a backup entry which will be later returned by run().
@ -93,6 +95,7 @@ private:
const ASTBackupQuery::Elements backup_query_elements;
const BackupSettings backup_settings;
std::shared_ptr<IBackupCoordination> backup_coordination;
const ReadSettings read_settings;
ContextPtr context;
std::chrono::milliseconds on_cluster_first_sync_timeout;
std::chrono::milliseconds consistent_metadata_snapshot_timeout;

View File

@ -57,7 +57,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
return *file_size;
}
UInt128 BackupEntryFromImmutableFile::getChecksum() const
UInt128 BackupEntryFromImmutableFile::getChecksum(const ReadSettings & read_settings) const
{
{
std::lock_guard lock{size_and_checksum_mutex};
@ -73,7 +73,7 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
}
}
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum();
auto calculated_checksum = BackupEntryWithChecksumCalculation<IBackupEntry>::getChecksum(read_settings);
{
std::lock_guard lock{size_and_checksum_mutex};
@ -86,13 +86,13 @@ UInt128 BackupEntryFromImmutableFile::getChecksum() const
}
}
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length) const
std::optional<UInt128> BackupEntryFromImmutableFile::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
{
if (prefix_length == 0)
return 0;
if (prefix_length >= getSize())
return getChecksum();
return getChecksum(read_settings);
/// For immutable files we don't use partial checksums.
return std::nullopt;

View File

@ -27,8 +27,8 @@ public:
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override;
UInt64 getSize() const override;
UInt128 getChecksum() const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
UInt128 getChecksum(const ReadSettings & read_settings) const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;
DataSourceDescription getDataSourceDescription() const override { return data_source_description; }
bool isEncryptedByDisk() const override { return copy_encrypted; }

View File

@ -11,17 +11,17 @@ namespace DB
{
namespace
{
String readFile(const String & file_path)
String readFile(const String & file_path, const ReadSettings & read_settings)
{
auto buf = createReadBufferFromFileBase(file_path, /* settings= */ {});
auto buf = createReadBufferFromFileBase(file_path, read_settings);
String s;
readStringUntilEOF(s, *buf);
return s;
}
String readFile(const DiskPtr & disk, const String & file_path, bool copy_encrypted)
String readFile(const DiskPtr & disk, const String & file_path, const ReadSettings & read_settings, bool copy_encrypted)
{
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, {}) : disk->readFile(file_path);
auto buf = copy_encrypted ? disk->readEncryptedFile(file_path, read_settings) : disk->readFile(file_path, read_settings);
String s;
readStringUntilEOF(s, *buf);
return s;
@ -29,19 +29,19 @@ namespace
}
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_)
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_)
: file_path(file_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(file_path_))
, data(readFile(file_path_))
, data(readFile(file_path_, read_settings_))
{
}
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_)
BackupEntryFromSmallFile::BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_)
: disk(disk_)
, file_path(file_path_)
, data_source_description(disk_->getDataSourceDescription())
, copy_encrypted(copy_encrypted_ && data_source_description.is_encrypted)
, data(readFile(disk_, file_path, copy_encrypted))
, data(readFile(disk_, file_path, read_settings_, copy_encrypted))
{
}

View File

@ -13,8 +13,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
class BackupEntryFromSmallFile : public BackupEntryWithChecksumCalculation<IBackupEntry>
{
public:
explicit BackupEntryFromSmallFile(const String & file_path_);
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, bool copy_encrypted_ = false);
explicit BackupEntryFromSmallFile(const String & file_path_, const ReadSettings & read_settings_);
BackupEntryFromSmallFile(const DiskPtr & disk_, const String & file_path_, const ReadSettings & read_settings_, bool copy_encrypted_ = false);
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings &) const override;
UInt64 getSize() const override { return data.size(); }

View File

@ -6,7 +6,7 @@ namespace DB
{
template <typename Base>
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum(const ReadSettings & read_settings) const
{
{
std::lock_guard lock{checksum_calculation_mutex};
@ -26,7 +26,7 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
}
else
{
auto read_buffer = this->getReadBuffer(ReadSettings{}.adjustBufferSize(size));
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(size));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignoreAll();
calculated_checksum = hashing_read_buffer.getHash();
@ -37,23 +37,20 @@ UInt128 BackupEntryWithChecksumCalculation<Base>::getChecksum() const
}
template <typename Base>
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length) const
std::optional<UInt128> BackupEntryWithChecksumCalculation<Base>::getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const
{
if (prefix_length == 0)
return 0;
size_t size = this->getSize();
if (prefix_length >= size)
return this->getChecksum();
return this->getChecksum(read_settings);
std::lock_guard lock{checksum_calculation_mutex};
ReadSettings read_settings;
if (calculated_checksum)
read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size);
auto read_buffer = this->getReadBuffer(read_settings);
auto read_buffer = this->getReadBuffer(read_settings.adjustBufferSize(calculated_checksum ? prefix_length : size));
HashingReadBuffer hashing_read_buffer(*read_buffer);
hashing_read_buffer.ignore(prefix_length);
auto partial_checksum = hashing_read_buffer.getHash();

View File

@ -11,8 +11,8 @@ template <typename Base>
class BackupEntryWithChecksumCalculation : public Base
{
public:
UInt128 getChecksum() const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override;
UInt128 getChecksum(const ReadSettings & read_settings) const override;
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override;
private:
mutable std::optional<UInt128> calculated_checksum;

View File

@ -17,8 +17,8 @@ public:
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return entry->getReadBuffer(read_settings); }
UInt64 getSize() const override { return entry->getSize(); }
UInt128 getChecksum() const override { return entry->getChecksum(); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return entry->getPartialChecksum(prefix_length); }
UInt128 getChecksum(const ReadSettings & read_settings) const override { return entry->getChecksum(read_settings); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return entry->getPartialChecksum(prefix_length, read_settings); }
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
bool isEncryptedByDisk() const override { return entry->isEncryptedByDisk(); }
bool isFromFile() const override { return entry->isFromFile(); }

View File

@ -3,6 +3,8 @@
#include <Backups/IBackup.h>
#include <Backups/BackupInfo.h>
#include <Core/Types.h>
#include <IO/ReadSettings.h>
#include <IO/WriteSettings.h>
#include <Parsers/IAST_fwd.h>
#include <boost/noncopyable.hpp>
#include <memory>
@ -37,6 +39,8 @@ public:
std::optional<UUID> backup_uuid;
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
ReadSettings read_settings;
WriteSettings write_settings;
};
static BackupFactory & instance();

View File

@ -57,12 +57,12 @@ namespace
/// Calculate checksum for backup entry if it's empty.
/// Also able to calculate additional checksum of some prefix.
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size)
ChecksumsForNewEntry calculateNewEntryChecksumsIfNeeded(const BackupEntryPtr & entry, size_t prefix_size, const ReadSettings & read_settings)
{
ChecksumsForNewEntry res;
/// The partial checksum should be calculated before the full checksum to enable optimization in BackupEntryWithChecksumCalculation.
res.prefix_checksum = entry->getPartialChecksum(prefix_size);
res.full_checksum = entry->getChecksum();
res.prefix_checksum = entry->getPartialChecksum(prefix_size, read_settings);
res.full_checksum = entry->getChecksum(read_settings);
return res;
}
@ -93,7 +93,12 @@ String BackupFileInfo::describe() const
}
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log)
BackupFileInfo buildFileInfoForBackupEntry(
const String & file_name,
const BackupEntryPtr & backup_entry,
const BackupPtr & base_backup,
const ReadSettings & read_settings,
Poco::Logger * log)
{
auto adjusted_path = removeLeadingSlash(file_name);
@ -126,7 +131,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
/// File with the same name but smaller size exist in previous backup
if (check_base == CheckBackupResult::HasPrefix)
{
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, base_backup_file_info->first, read_settings);
info.checksum = checksums.full_checksum;
/// We have prefix of this file in backup with the same checksum.
@ -146,7 +151,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
{
/// We have full file or have nothing, first of all let's get checksum
/// of current file
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
info.checksum = checksums.full_checksum;
if (info.checksum == base_backup_file_info->second)
@ -169,7 +174,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
}
else
{
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0);
auto checksums = calculateNewEntryChecksumsIfNeeded(backup_entry, 0, read_settings);
info.checksum = checksums.full_checksum;
}
@ -188,7 +193,7 @@ BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const Backu
return info;
}
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool)
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool)
{
BackupFileInfos infos;
infos.resize(backup_entries.size());
@ -210,7 +215,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
++num_active_jobs;
}
auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &base_backup, &thread_group, i, log](bool async)
auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, i, log](bool async)
{
SCOPE_EXIT_SAFE({
std::lock_guard lock{mutex};
@ -237,7 +242,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr
return;
}
infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, log);
infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, read_settings, log);
}
catch (...)
{

View File

@ -13,6 +13,7 @@ class IBackupEntry;
using BackupPtr = std::shared_ptr<const IBackup>;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
struct ReadSettings;
/// Information about a file stored in a backup.
@ -66,9 +67,9 @@ struct BackupFileInfo
using BackupFileInfos = std::vector<BackupFileInfo>;
/// Builds a BackupFileInfo for a specified backup entry.
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, Poco::Logger * log);
BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, const ReadSettings & read_settings, Poco::Logger * log);
/// Builds a vector of BackupFileInfos for specified backup entries.
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, ThreadPool & thread_pool);
BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool);
}

View File

@ -4,17 +4,16 @@
#include <IO/copyData.h>
#include <IO/WriteBufferFromFileBase.h>
#include <IO/ReadBufferFromFileBase.h>
#include <Interpreters/Context.h>
#include <Common/logger_useful.h>
namespace DB
{
BackupReaderDefault::BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_)
BackupReaderDefault::BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, read_settings(read_settings_)
, write_settings(write_settings_)
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}
@ -37,10 +36,10 @@ void BackupReaderDefault::copyFileToDisk(const String & path_in_backup, size_t f
write_buffer->finalize();
}
BackupWriterDefault::BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_)
BackupWriterDefault::BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_)
: log(log_)
, read_settings(context_->getBackupReadSettings())
, write_settings(context_->getWriteSettings())
, read_settings(read_settings_)
, write_settings(write_settings_)
, write_buffer_size(DBMS_DEFAULT_BUFFER_SIZE)
{
}

View File

@ -3,7 +3,6 @@
#include <Backups/BackupIO.h>
#include <IO/ReadSettings.h>
#include <IO/WriteSettings.h>
#include <Interpreters/Context_fwd.h>
namespace DB
@ -19,7 +18,7 @@ enum class WriteMode;
class BackupReaderDefault : public IBackupReader
{
public:
BackupReaderDefault(Poco::Logger * log_, const ContextPtr & context_);
BackupReaderDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
~BackupReaderDefault() override = default;
/// The function copyFileToDisk() can be much faster than reading the file with readFile() and then writing it to some disk.
@ -46,7 +45,7 @@ protected:
class BackupWriterDefault : public IBackupWriter
{
public:
BackupWriterDefault(Poco::Logger * log_, const ContextPtr & context_);
BackupWriterDefault(const ReadSettings & read_settings_, const WriteSettings & write_settings_, Poco::Logger * log_);
~BackupWriterDefault() override = default;
bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;

View File

@ -8,8 +8,8 @@
namespace DB
{
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
: BackupReaderDefault(&Poco::Logger::get("BackupReaderDisk"), context_)
BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderDisk"))
, disk(disk_)
, root_path(root_path_)
, data_source_description(disk->getDataSourceDescription())
@ -56,8 +56,8 @@ void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file
}
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_)
: BackupWriterDefault(&Poco::Logger::get("BackupWriterDisk"), context_)
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterDisk"))
, disk(disk_)
, root_path(root_path_)
, data_source_description(disk->getDataSourceDescription())

View File

@ -13,7 +13,7 @@ using DiskPtr = std::shared_ptr<IDisk>;
class BackupReaderDisk : public BackupReaderDefault
{
public:
BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
BackupReaderDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
~BackupReaderDisk() override;
bool fileExists(const String & file_name) override;
@ -33,7 +33,7 @@ private:
class BackupWriterDisk : public BackupWriterDefault
{
public:
BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ContextPtr & context_);
BackupWriterDisk(const DiskPtr & disk_, const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
~BackupWriterDisk() override;
bool fileExists(const String & file_name) override;

View File

@ -16,8 +16,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
BackupReaderFile::BackupReaderFile(const String & root_path_, const ContextPtr & context_)
: BackupReaderDefault(&Poco::Logger::get("BackupReaderFile"), context_)
BackupReaderFile::BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderFile"))
, root_path(root_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
{
@ -74,8 +74,8 @@ void BackupReaderFile::copyFileToDisk(const String & path_in_backup, size_t file
}
BackupWriterFile::BackupWriterFile(const String & root_path_, const ContextPtr & context_)
: BackupWriterDefault(&Poco::Logger::get("BackupWriterFile"), context_)
BackupWriterFile::BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_)
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterFile"))
, root_path(root_path_)
, data_source_description(DiskLocal::getLocalDataSourceDescription(root_path))
{

View File

@ -11,7 +11,7 @@ namespace DB
class BackupReaderFile : public BackupReaderDefault
{
public:
explicit BackupReaderFile(const String & root_path_, const ContextPtr & context_);
explicit BackupReaderFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
@ -29,7 +29,7 @@ private:
class BackupWriterFile : public BackupWriterDefault
{
public:
BackupWriterFile(const String & root_path_, const ContextPtr & context_);
BackupWriterFile(const String & root_path_, const ReadSettings & read_settings_, const WriteSettings & write_settings_);
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;

View File

@ -101,8 +101,14 @@ namespace
BackupReaderS3::BackupReaderS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
: BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_)
const S3::URI & s3_uri_,
const String & access_key_id_,
const String & secret_access_key_,
bool allow_s3_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderS3"))
, s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
@ -178,8 +184,15 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
BackupWriterS3::BackupWriterS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_)
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
const S3::URI & s3_uri_,
const String & access_key_id_,
const String & secret_access_key_,
bool allow_s3_native_copy,
const String & storage_class_name,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
: BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterS3"))
, s3_uri(s3_uri_)
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)

View File

@ -17,7 +17,7 @@ namespace DB
class BackupReaderS3 : public BackupReaderDefault
{
public:
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupReaderS3() override;
bool fileExists(const String & file_name) override;
@ -38,7 +38,7 @@ private:
class BackupWriterS3 : public BackupWriterDefault
{
public:
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ContextPtr & context_);
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
~BackupWriterS3() override;
bool fileExists(const String & file_name) override;

View File

@ -27,6 +27,7 @@ namespace ErrorCodes
M(Bool, decrypt_files_from_encrypted_disks) \
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \
M(UInt64, replica_num) \
M(Bool, internal) \

View File

@ -44,6 +44,10 @@ struct BackupSettings
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
bool allow_s3_native_copy = true;
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache.
bool read_from_filesystem_cache = true;
/// 1-based shard index to store in the backup. 0 means all shards.
/// Can only be used with BACKUP ON CLUSTER.
size_t shard_num = 0;

View File

@ -178,6 +178,42 @@ namespace
{
return status == BackupStatus::RESTORING;
}
/// We use slightly different read and write settings for backup/restore
/// with a separate throttler and limited usage of filesystem cache.
ReadSettings getReadSettingsForBackup(const ContextPtr & context, const BackupSettings & backup_settings)
{
auto read_settings = context->getReadSettings();
read_settings.remote_throttler = context->getBackupsThrottler();
read_settings.local_throttler = context->getBackupsThrottler();
read_settings.enable_filesystem_cache = backup_settings.read_from_filesystem_cache;
read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = backup_settings.read_from_filesystem_cache;
return read_settings;
}
WriteSettings getWriteSettingsForBackup(const ContextPtr & context)
{
auto write_settings = context->getWriteSettings();
write_settings.enable_filesystem_cache_on_write_operations = false;
return write_settings;
}
ReadSettings getReadSettingsForRestore(const ContextPtr & context)
{
auto read_settings = context->getReadSettings();
read_settings.remote_throttler = context->getBackupsThrottler();
read_settings.local_throttler = context->getBackupsThrottler();
read_settings.enable_filesystem_cache = false;
read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
return read_settings;
}
WriteSettings getWriteSettingsForRestore(const ContextPtr & context)
{
auto write_settings = context->getWriteSettings();
write_settings.enable_filesystem_cache_on_write_operations = false;
return write_settings;
}
}
@ -350,6 +386,8 @@ void BackupsWorker::doBackup(
backup_create_params.backup_uuid = backup_settings.backup_uuid;
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context);
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
/// Write the backup.
@ -378,12 +416,12 @@ void BackupsWorker::doBackup(
/// Prepare backup entries.
BackupEntries backup_entries;
{
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context};
BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, backup_create_params.read_settings, context};
backup_entries = backup_entries_collector.run();
}
/// Write the backup entries to the backup.
buildFileInfosForBackupEntries(backup, backup_entries, backup_coordination);
buildFileInfosForBackupEntries(backup, backup_entries, backup_create_params.read_settings, backup_coordination);
writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal);
/// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
@ -433,12 +471,12 @@ void BackupsWorker::doBackup(
}
void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, std::shared_ptr<IBackupCoordination> backup_coordination)
void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr<IBackupCoordination> backup_coordination)
{
LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS);
backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, "");
backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS);
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), *backups_thread_pool));
backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, *backups_thread_pool));
}
@ -650,6 +688,8 @@ void BackupsWorker::doRestore(
backup_open_params.base_backup_info = restore_settings.base_backup_info;
backup_open_params.password = restore_settings.password;
backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy;
backup_open_params.read_settings = getReadSettingsForRestore(context);
backup_open_params.write_settings = getWriteSettingsForRestore(context);
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
String current_database = context->getCurrentDatabase();

View File

@ -24,6 +24,7 @@ using BackupPtr = std::shared_ptr<const IBackup>;
class IBackupEntry;
using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
using DataRestoreTasks = std::vector<std::function<void()>>;
struct ReadSettings;
/// Manager of backups and restores: executes backups and restores' threads in the background.
/// Keeps information about backups and restores started in this session.
@ -107,7 +108,7 @@ private:
bool called_async);
/// Builds file infos for specified backup entries.
void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, std::shared_ptr<IBackupCoordination> backup_coordination);
void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr<IBackupCoordination> backup_coordination);
/// Write backup entries to an opened backup.
void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const OperationID & backup_id, std::shared_ptr<IBackupCoordination> backup_coordination, bool internal);

View File

@ -19,8 +19,8 @@ public:
std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getReadBuffer(read_settings); }
UInt64 getSize() const override { return getInternalBackupEntry()->getSize(); }
UInt128 getChecksum() const override { return getInternalBackupEntry()->getChecksum(); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length); }
UInt128 getChecksum(const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getChecksum(read_settings); }
std::optional<UInt128> getPartialChecksum(size_t prefix_length, const ReadSettings & read_settings) const override { return getInternalBackupEntry()->getPartialChecksum(prefix_length, read_settings); }
DataSourceDescription getDataSourceDescription() const override { return getInternalBackupEntry()->getDataSourceDescription(); }
bool isEncryptedByDisk() const override { return getInternalBackupEntry()->isEncryptedByDisk(); }
bool isFromFile() const override { return getInternalBackupEntry()->isFromFile(); }

View File

@ -21,11 +21,11 @@ public:
virtual UInt64 getSize() const = 0;
/// Returns the checksum of the data.
virtual UInt128 getChecksum() const = 0;
virtual UInt128 getChecksum(const ReadSettings & read_settings) const = 0;
/// Returns a partial checksum, i.e. the checksum calculated for a prefix part of the data.
/// Can return nullopt if the partial checksum is too difficult to calculate.
virtual std::optional<UInt128> getPartialChecksum(size_t /* prefix_length */) const { return {}; }
virtual std::optional<UInt128> getPartialChecksum(size_t /* prefix_length */, const ReadSettings &) const { return {}; }
/// Returns a read buffer for reading the data.
virtual std::unique_ptr<SeekableReadBuffer> getReadBuffer(const ReadSettings & read_settings) const = 0;

View File

@ -107,12 +107,27 @@ void registerBackupEngineS3(BackupFactory & factory)
if (params.open_mode == IBackup::OpenMode::READ)
{
auto reader = std::make_shared<BackupReaderS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context);
auto reader = std::make_shared<BackupReaderS3>(S3::URI{s3_uri},
access_key_id,
secret_access_key,
params.allow_s3_native_copy,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.s3_storage_class, params.context);
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri},
access_key_id,
secret_access_key,
params.allow_s3_native_copy,
params.s3_storage_class,
params.read_settings,
params.write_settings,
params.context);
return std::make_unique<BackupImpl>(
backup_name_for_logging,
archive_params,

View File

@ -169,18 +169,18 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
{
std::shared_ptr<IBackupReader> reader;
if (engine_name == "File")
reader = std::make_shared<BackupReaderFile>(path, params.context);
reader = std::make_shared<BackupReaderFile>(path, params.read_settings, params.write_settings);
else
reader = std::make_shared<BackupReaderDisk>(disk, path, params.context);
reader = std::make_shared<BackupReaderDisk>(disk, path, params.read_settings, params.write_settings);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
std::shared_ptr<IBackupWriter> writer;
if (engine_name == "File")
writer = std::make_shared<BackupWriterFile>(path, params.context);
writer = std::make_shared<BackupWriterFile>(path, params.read_settings, params.write_settings);
else
writer = std::make_shared<BackupWriterDisk>(disk, path, params.context);
writer = std::make_shared<BackupWriterDisk>(disk, path, params.read_settings, params.write_settings);
return std::make_unique<BackupImpl>(
backup_name_for_logging,
archive_params,

View File

@ -69,14 +69,14 @@ protected:
static String getChecksum(const BackupEntryPtr & backup_entry)
{
return getHexUIntUppercase(backup_entry->getChecksum());
return getHexUIntUppercase(backup_entry->getChecksum({}));
}
static const constexpr std::string_view NO_CHECKSUM = "no checksum";
static String getPartialChecksum(const BackupEntryPtr & backup_entry, size_t prefix_length)
{
auto partial_checksum = backup_entry->getPartialChecksum(prefix_length);
auto partial_checksum = backup_entry->getPartialChecksum(prefix_length, {});
if (!partial_checksum)
return String{NO_CHECKSUM};
return getHexUIntUppercase(*partial_checksum);
@ -218,7 +218,7 @@ TEST_F(BackupEntriesTest, PartialChecksumBeforeFullChecksum)
TEST_F(BackupEntriesTest, BackupEntryFromSmallFile)
{
writeFile(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromSmallFile>(local_disk, "a.txt");
auto entry = std::make_shared<BackupEntryFromSmallFile>(local_disk, "a.txt", ReadSettings{});
local_disk->removeFile("a.txt");
@ -239,7 +239,7 @@ TEST_F(BackupEntriesTest, DecryptedEntriesFromEncryptedDisk)
std::pair<BackupEntryPtr, bool /* partial_checksum_allowed */> test_cases[]
= {{std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt"), false},
{std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "a.txt"), true},
{std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt"), true}};
{std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt", ReadSettings{}), true}};
for (const auto & [entry, partial_checksum_allowed] : test_cases)
{
EXPECT_EQ(entry->getSize(), 9);
@ -258,7 +258,7 @@ TEST_F(BackupEntriesTest, DecryptedEntriesFromEncryptedDisk)
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "empty.txt"),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "empty.txt"),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt")};
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt", ReadSettings{})};
for (const auto & entry : entries)
{
EXPECT_EQ(entry->getSize(), 0);
@ -288,7 +288,7 @@ TEST_F(BackupEntriesTest, EncryptedEntriesFromEncryptedDisk)
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt", /* copy_encrypted= */ true)};
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "a.txt", ReadSettings{}, /* copy_encrypted= */ true)};
auto encrypted_checksum = getChecksum(entries[0]);
EXPECT_NE(encrypted_checksum, NO_CHECKSUM);
@ -322,7 +322,7 @@ TEST_F(BackupEntriesTest, EncryptedEntriesFromEncryptedDisk)
BackupEntryPtr entries[]
= {std::make_shared<BackupEntryFromImmutableFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromAppendOnlyFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true),
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt", /* copy_encrypted= */ true)};
std::make_shared<BackupEntryFromSmallFile>(encrypted_disk, "empty.txt", ReadSettings{}, /* copy_encrypted= */ true)};
for (const auto & entry : entries)
{
EXPECT_EQ(entry->getSize(), 0);

View File

@ -2313,15 +2313,28 @@ void ClientBase::runInteractive()
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G", "\\G;"};
char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?";
#if USE_REPLXX
replxx::Replxx::highlighter_callback_t highlight_callback{};
if (config().getBool("highlight", true))
highlight_callback = highlight;
ReplxxLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters, highlight_callback);
ReplxxLineReader lr(
*suggest,
history_file,
config().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters,
highlight_callback);
#else
LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters);
LineReader lr(
history_file,
config().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters);
#endif
static const std::initializer_list<std::pair<String, String>> backslash_aliases =

View File

@ -66,7 +66,7 @@ void addNewWords(Words & to, const Words & from, Compare comp)
namespace DB
{
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length)
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters)
{
std::string_view last_word;
@ -135,7 +135,10 @@ void LineReader::Suggest::addWords(Words && new_words)
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
: history_file_path(history_file_path_), multiline(multiline_), extenders(std::move(extenders_)), delimiters(std::move(delimiters_))
: history_file_path(history_file_path_)
, multiline(multiline_)
, extenders(std::move(extenders_))
, delimiters(std::move(delimiters_))
{
/// FIXME: check extender != delimiter
}

View File

@ -21,7 +21,7 @@ public:
using Callback = std::function<Words(const String & prefix, size_t prefix_length)>;
/// Get vector for the matched range of words if any.
replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters);
void addWords(Words && new_words);
void setCompletionsCallback(Callback && callback) { custom_completions_callback = callback; }
@ -65,7 +65,6 @@ protected:
};
const String history_file_path;
static constexpr char word_break_characters[] = " \t\v\f\a\b\r\n`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?";
String input;

View File

@ -35,6 +35,18 @@ LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool
LocalConnection::~LocalConnection()
{
/// Last query may not have been finished or cancelled due to exception on client side.
if (state && !state->is_finished && !state->is_cancelled)
{
try
{
LocalConnection::sendCancel();
}
catch (...)
{
/// Just ignore any exception.
}
}
state.reset();
}
@ -73,6 +85,10 @@ void LocalConnection::sendQuery(
bool,
std::function<void(const Progress &)> process_progress_callback)
{
/// Last query may not have been finished or cancelled due to exception on client side.
if (state && !state->is_finished && !state->is_cancelled)
sendCancel();
/// Suggestion comes without client_info.
if (client_info)
query_context = session.makeQueryContext(*client_info);
@ -204,6 +220,10 @@ void LocalConnection::sendCancel()
state->is_cancelled = true;
if (state->executor)
state->executor->cancel();
if (state->pushing_executor)
state->pushing_executor->cancel();
if (state->pushing_async_executor)
state->pushing_async_executor->cancel();
}
bool LocalConnection::pullBlock(Block & block)

View File

@ -287,8 +287,10 @@ ReplxxLineReader::ReplxxLineReader(
bool multiline_,
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
, word_break_characters(word_break_characters_)
, editor(getEditor())
{
using namespace std::placeholders;
@ -326,9 +328,9 @@ ReplxxLineReader::ReplxxLineReader(
rx.install_window_change_handler();
auto callback = [&suggest] (const String & context, size_t context_size)
auto callback = [&suggest, this] (const String & context, size_t context_size)
{
return suggest.getCompletions(context, context_size);
return suggest.getCompletions(context, context_size, word_break_characters);
};
rx.set_completion_callback(callback);

View File

@ -15,6 +15,7 @@ public:
bool multiline,
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_);
~ReplxxLineReader() override;
@ -33,6 +34,8 @@ private:
replxx::Replxx rx;
replxx::Replxx::highlighter_callback_t highlighter;
const char * word_break_characters;
// used to call flock() to synchronize multiple clients using same history file
int history_file_fd = -1;
bool bracketed_paste_enabled = false;

View File

@ -582,7 +582,8 @@
M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \
M(698, INVALID_REDIS_STORAGE_TYPE) \
M(699, INVALID_REDIS_TABLE_STRUCTURE) \
M(700, USER_SESSION_LIMIT_EXCEEDED) \
M(700, USER_SESSION_LIMIT_EXCEEDED) \
M(701, CLUSTER_DOESNT_EXIST) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -50,6 +50,8 @@ void abortOnFailedAssertion(const String & description)
abort();
}
bool terminate_on_any_exception = false;
/// - Aborts the process if error code is LOGICAL_ERROR.
/// - Increments error codes statistics.
void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)
@ -84,6 +86,8 @@ Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_)
: Poco::Exception(msg_masked.msg, code)
, remote(remote_)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
handle_error_code(msg_masked.msg, code, remote, getStackFramePointers());
}
@ -92,6 +96,8 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_)
: Poco::Exception(msg_masked.msg, code)
, remote(remote_)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
handle_error_code(message(), code, remote, getStackFramePointers());
}
@ -99,6 +105,8 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_)
Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
: Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();
@ -111,6 +119,8 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
Exception::Exception(CreateFromSTDTag, const std::exception & exc)
: Poco::Exception(demangle(typeid(exc).name()) + ": " + String(exc.what()), ErrorCodes::STD_EXCEPTION)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();

View File

@ -20,6 +20,10 @@ namespace DB
void abortOnFailedAssertion(const String & description);
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
extern bool terminate_on_any_exception;
class Exception : public Poco::Exception
{
public:
@ -27,17 +31,23 @@ public:
Exception()
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
}
Exception(const PreformattedMessage & msg, int code): Exception(msg.text, code)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = msg.format_string;
}
Exception(PreformattedMessage && msg, int code): Exception(std::move(msg.text), code)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
message_format_string = msg.format_string;
}

View File

@ -33,6 +33,9 @@ void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati
{
std::lock_guard guard(mutex);
forbidden_headers.clear();
forbidden_headers_regexp.clear();
if (config.has("http_forbid_headers"))
{
std::vector<std::string> keys;
@ -46,11 +49,6 @@ void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
}
}
else
{
forbidden_headers.clear();
forbidden_headers_regexp.clear();
}
}
}

View File

@ -8,6 +8,7 @@
#include <Parsers/formatAST.h>
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/parseQuery.h>
@ -225,24 +226,15 @@ public:
void remove(const std::string & collection_name)
{
if (!removeIfExists(collection_name))
auto collection_path = getMetadataPath(collection_name);
if (!fs::exists(collection_path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
collection_name);
}
}
bool removeIfExists(const std::string & collection_name)
{
auto collection_path = getMetadataPath(collection_name);
if (fs::exists(collection_path))
{
fs::remove(collection_path);
return true;
}
return false;
fs::remove(collection_path);
}
private:
@ -393,36 +385,64 @@ void loadIfNot()
return loadIfNotUnlocked(lock);
}
void removeFromSQL(const std::string & collection_name, ContextPtr context)
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
LoadFromSQL(context).remove(collection_name);
NamedCollectionFactory::instance().remove(collection_name);
}
void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
LoadFromSQL(context).removeIfExists(collection_name);
NamedCollectionFactory::instance().removeIfExists(collection_name);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).remove(query.collection_name);
instance.remove(query.collection_name);
}
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
NamedCollectionFactory::instance().add(query.collection_name, LoadFromSQL(context).create(query));
auto & instance = NamedCollectionFactory::instance();
if (instance.exists(query.collection_name))
{
if (!query.if_not_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"A named collection `{}` already exists",
query.collection_name);
}
return;
}
instance.add(query.collection_name, LoadFromSQL(context).create(query));
}
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).update(query);
auto collection = NamedCollectionFactory::instance().getMutable(query.collection_name);
auto collection = instance.getMutable(query.collection_name);
auto collection_lock = collection->lock();
for (const auto & [name, value] : query.changes)

View File

@ -8,6 +8,7 @@ namespace DB
class ASTCreateNamedCollectionQuery;
class ASTAlterNamedCollectionQuery;
class ASTDropNamedCollectionQuery;
namespace NamedCollectionUtils
{
@ -26,8 +27,7 @@ void reloadFromConfig(const Poco::Util::AbstractConfiguration & config);
void loadFromSQL(ContextPtr context);
/// Remove collection as well as its metadata from `context->getPath() / named_collections /`.
void removeFromSQL(const std::string & collection_name, ContextPtr context);
void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context);
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context);
/// Create a new collection from AST and put it to `context->getPath() / named_collections /`.
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context);

View File

@ -101,6 +101,10 @@ void ProgressIndication::writeFinalProgress()
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
auto peak_memory_usage = getMemoryUsage().peak;
if (peak_memory_usage >= 0)
std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)

View File

@ -70,6 +70,8 @@ ThreadGroup::ThreadGroup()
ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_)
: thread_id{getThreadId()}, check_current_thread_on_destruction(check_current_thread_on_destruction_)
{
chassert(!current_thread);
last_rusage = std::make_unique<RUsageCounters>();
memory_tracker.setDescription("(for thread)");
@ -123,6 +125,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_)
ThreadGroupPtr ThreadStatus::getThreadGroup() const
{
chassert(current_thread == this);
return thread_group;
}

View File

@ -218,7 +218,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh
}
catch (...)
{
LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_path);
LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_file_info.path);
tryLogCurrentException(__PRETTY_FUNCTION__);
}
});

View File

@ -78,7 +78,7 @@ class IColumn;
M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \
@ -783,6 +783,7 @@ class IColumn;
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0)\
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
@ -838,6 +839,9 @@ class IColumn;
MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
MAKE_OBSOLETE(M, Bool, input_format_arrow_import_nested, false) \
MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \
MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
/** The section above is for obsolete settings. Do not add anything there. */
@ -859,12 +863,9 @@ class IColumn;
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \
M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \

View File

@ -77,7 +77,6 @@ public:
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
};

View File

@ -107,9 +107,6 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
{
cckMetadataPathForOrdinary(create, metadata_path);
/// Creates store/xxx/ for Atomic
fs::create_directories(fs::path(metadata_path).parent_path());
DatabasePtr impl = getImpl(create, metadata_path, context);
if (impl && context->hasQueryContext() && context->getSettingsRef().log_queries)

View File

@ -11,9 +11,11 @@
#include <Storages/IStorage.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/filesystemHelpers.h>
#include <Formats/FormatFactory.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
@ -75,10 +77,8 @@ bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, Cont
/// Check access for file before checking its existence.
if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path))
{
if (throw_on_error)
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path);
else
return false;
/// Access denied is thrown regardless of 'throw_on_error'
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File is not inside {}", user_files_path);
}
/// Check if the corresponding file exists.
@ -128,20 +128,25 @@ bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_)
if (tryGetTableFromCache(name))
return true;
return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false);
return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */ false);
}
StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const
StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_, bool throw_on_error) const
{
/// Check if table exists in loaded tables map.
if (auto table = tryGetTableFromCache(name))
return table;
auto table_path = getTablePath(name);
checkTableFilePath(table_path, context_, /* throw_on_error */true);
if (!checkTableFilePath(table_path, context_, throw_on_error))
return {};
String format = FormatFactory::instance().getFormatFromFileName(table_path, throw_on_error);
if (format.empty())
return {};
/// If the file exists, create a new table using TableFunctionFile and return it.
auto args = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path));
auto args = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path), std::make_shared<ASTLiteral>(format));
auto table_function = TableFunctionFactory::instance().get(args, context_);
if (!table_function)
@ -158,7 +163,7 @@ StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr cont
StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const
{
/// getTableImpl can throw exceptions, do not catch them to show correct error to user.
if (auto storage = getTableImpl(name, context_))
if (auto storage = getTableImpl(name, context_, true))
return storage;
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
@ -167,20 +172,7 @@ StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_
StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const
{
try
{
return getTableImpl(name, context_);
}
catch (const Exception & e)
{
/// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table
/// see tests/02722_database_filesystem.sh for more details.
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST)
{
return nullptr;
}
throw;
}
return getTableImpl(name, context_, false);
}
bool DatabaseFilesystem::empty() const

View File

@ -48,7 +48,7 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
protected:
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const;
StoragePtr tryGetTableFromCache(const std::string & name) const;

View File

@ -77,6 +77,8 @@ DatabaseMySQL::DatabaseMySQL(
throw;
}
fs::create_directories(metadata_path);
thread = ThreadFromGlobalPool{&DatabaseMySQL::cleanOutdatedTables, this};
}
@ -144,6 +146,7 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context
auto table_storage_define = database_engine_define->clone();
{
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE;
ASTs storage_children = ast_storage->children;
auto storage_engine_arguments = ast_storage->engine->arguments;

View File

@ -54,6 +54,7 @@ DatabasePostgreSQL::DatabasePostgreSQL(
, cache_tables(cache_tables_)
, log(&Poco::Logger::get("DatabasePostgreSQL(" + dbname_ + ")"))
{
fs::create_directories(metadata_path);
cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
cleaner_task->deactivate();
}
@ -390,6 +391,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
auto create_table_query = std::make_shared<ASTCreateQuery>();
auto table_storage_define = database_engine_define->clone();
table_storage_define->as<ASTStorage>()->engine->kind = ASTFunction::Kind::TABLE_ENGINE;
create_table_query->set(create_table_query->storage, table_storage_define);
auto columns_declare_list = std::make_shared<ASTColumns>();

View File

@ -187,6 +187,7 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex
}
auto table_storage_define = database_engine_define->clone();
ASTStorage * ast_storage = table_storage_define->as<ASTStorage>();
ast_storage->engine->kind = ASTFunction::Kind::TABLE_ENGINE;
auto storage_engine_arguments = ast_storage->engine->arguments;
auto table_id = storage->getStorageID();
/// Add table_name to engine arguments

View File

@ -120,7 +120,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
format_settings.parquet.row_group_bytes = settings.output_format_parquet_row_group_size_bytes;
format_settings.parquet.output_version = settings.output_format_parquet_version;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
@ -170,7 +169,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.with_types_use_header = settings.input_format_with_types_use_header;
format_settings.write_statistics = settings.output_format_write_statistics;
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
@ -178,11 +176,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string;
format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array;
format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
@ -687,14 +683,6 @@ void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name)
target = true;
}
void FormatFactory::markFormatSupportsSubcolumns(const String & name)
{
auto & target = dict[name].supports_subcolumns;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subcolumns", name);
target = true;
}
void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name)
{
auto & target = dict[name].prefers_large_blocks;
@ -703,12 +691,6 @@ void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name)
target = true;
}
bool FormatFactory::checkIfFormatSupportsSubcolumns(const String & name) const
{
const auto & target = getCreators(name);
return target.supports_subcolumns;
}
bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const
{
const auto & target = getCreators(name);

View File

@ -228,10 +228,8 @@ public:
void markOutputFormatSupportsParallelFormatting(const String & name);
void markOutputFormatPrefersLargeBlocks(const String & name);
void markFormatSupportsSubcolumns(const String & name);
void markFormatSupportsSubsetOfColumns(const String & name);
bool checkIfFormatSupportsSubcolumns(const String & name) const;
bool checkIfFormatSupportsSubsetOfColumns(const String & name) const;
bool checkIfFormatHasSchemaReader(const String & name) const;

View File

@ -113,7 +113,6 @@ struct FormatSettings
{
UInt64 row_group_size = 1000000;
bool low_cardinality_as_dictionary = false;
bool import_nested = false;
bool allow_missing_columns = false;
bool skip_columns_with_unsupported_types_in_schema_inference = false;
bool case_insensitive_column_matching = false;
@ -227,7 +226,6 @@ struct FormatSettings
{
UInt64 row_group_rows = 1000000;
UInt64 row_group_bytes = 512 * 1024 * 1024;
bool import_nested = false;
bool allow_missing_columns = false;
bool skip_columns_with_unsupported_types_in_schema_inference = false;
bool case_insensitive_column_matching = false;
@ -338,7 +336,6 @@ struct FormatSettings
struct
{
bool import_nested = false;
bool allow_missing_columns = false;
int64_t row_batch_size = 100'000;
bool skip_columns_with_unsupported_types_in_schema_inference = false;

View File

@ -42,50 +42,13 @@ void ZstdDeflatingAppendableWriteBuffer::nextImpl()
if (!offset())
return;
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
if (first_write && append_to_existing_file && isNeedToAddEmptyBlock())
{
addEmptyBlock();
first_write = false;
}
try
{
bool ended = false;
do
{
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_flush);
if (ZSTD_isError(compression_result))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
first_write = false;
out->position() = out->buffer().begin() + output.pos;
bool everything_was_compressed = (input.pos == input.size);
bool everything_was_flushed = compression_result == 0;
ended = everything_was_compressed && everything_was_flushed;
} while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
flush(ZSTD_e_flush);
}
ZstdDeflatingAppendableWriteBuffer::~ZstdDeflatingAppendableWriteBuffer()
@ -103,58 +66,58 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeImpl()
}
else
{
try
{
finalizeBefore();
out->finalize();
finalizeAfter();
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
throw;
}
finalizeBefore();
out->finalize();
finalizeAfter();
}
}
void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
{
next();
out->nextIfAtEnd();
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
/// Actually we can use ZSTD_e_flush here and add empty termination
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
/// but console zstd utility cannot.
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
while (remaining != 0)
flush(ZSTD_e_end);
}
void ZstdDeflatingAppendableWriteBuffer::flush(ZSTD_EndDirective mode)
{
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
try
{
if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream encoder end failed: error: '{}' ZSTD version: {}",
ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
out->position() = out->buffer().begin() + output.pos;
if (!out->hasPendingData())
bool ended = false;
do
{
out->next();
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
}
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode);
if (ZSTD_isError(compression_result))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
out->position() = out->buffer().begin() + output.pos;
bool everything_was_compressed = (input.pos == input.size);
bool everything_was_flushed = compression_result == 0;
ended = everything_was_compressed && everything_was_flushed;
} while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
}

View File

@ -52,6 +52,8 @@ private:
/// NOTE: will fill compressed data to the out.working_buffer, but will not call out.next method until the buffer is full
void nextImpl() override;
void flush(ZSTD_EndDirective mode);
/// Write terminating ZSTD_e_end: empty block + frame epilogue. BTW it
/// should be almost noop, because frame epilogue contains only checksums,
/// and they are disabled for this buffer.

View File

@ -32,13 +32,8 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer(
ZstdDeflatingWriteBuffer::~ZstdDeflatingWriteBuffer() = default;
void ZstdDeflatingWriteBuffer::nextImpl()
void ZstdDeflatingWriteBuffer::flush(ZSTD_EndDirective mode)
{
if (!offset())
return;
ZSTD_EndDirective mode = ZSTD_e_flush;
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
@ -54,7 +49,6 @@ void ZstdDeflatingWriteBuffer::nextImpl()
output.size = out->buffer().size();
output.pos = out->offset();
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode);
if (ZSTD_isError(compression_result))
throw Exception(
@ -78,24 +72,15 @@ void ZstdDeflatingWriteBuffer::nextImpl()
}
}
void ZstdDeflatingWriteBuffer::nextImpl()
{
if (offset())
flush(ZSTD_e_flush);
}
void ZstdDeflatingWriteBuffer::finalizeBefore()
{
next();
out->nextIfAtEnd();
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING);
out->position() = out->buffer().begin() + output.pos;
flush(ZSTD_e_end);
}
void ZstdDeflatingWriteBuffer::finalizeAfter()

View File

@ -37,6 +37,8 @@ private:
void finalizeBefore() override;
void finalizeAfter() override;
void flush(ZSTD_EndDirective mode);
ZSTD_CCtx * cctx;
ZSTD_inBuffer input;
ZSTD_outBuffer output;

View File

@ -587,7 +587,7 @@ KeyMetadata::iterator FileCache::addFileSegment(
}
}
bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
bool FileCache::tryReserve(FileSegment & file_segment, const size_t size, FileCacheReserveStat & reserve_stat)
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheReserveMicroseconds);
@ -653,6 +653,7 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
{
chassert(segment_metadata->file_segment->assertCorrectness());
auto & stat_by_kind = reserve_stat.stat_by_kind[segment_metadata->file_segment->getKind()];
if (segment_metadata->releasable())
{
const auto & key = segment_metadata->file_segment->key();
@ -661,9 +662,18 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
it = to_delete.emplace(key, locked_key.getKeyMetadata()).first;
it->second.add(segment_metadata);
stat_by_kind.releasable_size += segment_metadata->size();
++stat_by_kind.releasable_count;
freeable_space += segment_metadata->size();
++freeable_count;
}
else
{
stat_by_kind.non_releasable_size += segment_metadata->size();
++stat_by_kind.non_releasable_count;
}
return PriorityIterationResult::CONTINUE;
};
@ -718,6 +728,10 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
return is_overflow;
};
/// If we have enough space in query_priority, we are not interested about stat there anymore.
/// Clean the stat before iterating main_priority to avoid calculating any segment stat twice.
reserve_stat.stat_by_kind.clear();
if (is_main_priority_overflow())
{
main_priority->iterate(

View File

@ -30,6 +30,22 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
/// Track acquired space in cache during reservation
/// to make error messages when no space left more informative.
struct FileCacheReserveStat
{
struct Stat
{
size_t releasable_size;
size_t releasable_count;
size_t non_releasable_size;
size_t non_releasable_count;
};
std::unordered_map<FileSegmentKind, Stat> stat_by_kind;
};
/// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
/// Different caching algorithms are implemented using IFileCachePriority.
class FileCache : private boost::noncopyable
@ -106,7 +122,7 @@ public:
size_t getMaxFileSegmentSize() const { return max_file_segment_size; }
bool tryReserve(FileSegment & file_segment, size_t size);
bool tryReserve(FileSegment & file_segment, size_t size, FileCacheReserveStat & stat);
FileSegmentsHolderPtr getSnapshot();

View File

@ -186,9 +186,7 @@ bool FileSegment::isDownloaded() const
String FileSegment::getCallerId()
{
if (!CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
|| CurrentThread::getQueryId().empty())
if (!CurrentThread::isInitialized() || CurrentThread::getQueryId().empty())
return "None:" + toString(getThreadId());
return std::string(CurrentThread::getQueryId()) + ":" + toString(getThreadId());
@ -478,7 +476,7 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const
return metadata->tryLock();
}
bool FileSegment::reserve(size_t size_to_reserve)
bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat)
{
if (!size_to_reserve)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed");
@ -514,9 +512,8 @@ bool FileSegment::reserve(size_t size_to_reserve)
size_t already_reserved_size = reserved_size - expected_downloaded_size;
bool reserved = already_reserved_size >= size_to_reserve;
if (reserved)
return reserved;
if (already_reserved_size >= size_to_reserve)
return true;
size_to_reserve = size_to_reserve - already_reserved_size;
@ -525,7 +522,12 @@ bool FileSegment::reserve(size_t size_to_reserve)
if (is_unbound && is_file_segment_size_exceeded)
segment_range.right = range().left + expected_downloaded_size + size_to_reserve;
reserved = cache->tryReserve(*this, size_to_reserve);
/// if reserve_stat is not passed then use dummy stat and discard the result.
FileCacheReserveStat dummy_stat;
if (!reserve_stat)
reserve_stat = &dummy_stat;
bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat);
if (!reserved)
setDownloadFailedUnlocked(lockFileSegment());

View File

@ -26,6 +26,7 @@ namespace DB
{
class ReadBufferFromFileBase;
struct FileCacheReserveStat;
/*
* FileSegmentKind is used to specify the eviction policy for file segments.
@ -243,12 +244,7 @@ public:
/// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded).
/// Returns true if reservation was successful, false otherwise.
bool reserve(size_t size_to_reserve);
/// Try to reserve at max `size_to_reserve` bytes.
/// Returns actual size reserved. It can be less than size_to_reserve in non strict mode.
/// In strict mode throws an error on attempt to reserve space too much space.
size_t tryReserve(size_t size_to_reserve, bool strict = false);
bool reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat = nullptr);
/// Write data into reserved space.
void write(const char * from, size_t size, size_t offset);

View File

@ -1,5 +1,6 @@
#include <Interpreters/Cache/WriteBufferToFileSegment.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Interpreters/Cache/FileCache.h>
#include <IO/SwapHelper.h>
#include <IO/ReadBufferFromFile.h>
@ -44,11 +45,25 @@ void WriteBufferToFileSegment::nextImpl()
size_t bytes_to_write = offset();
FileCacheReserveStat reserve_stat;
/// In case of an error, we don't need to finalize the file segment
/// because it will be deleted soon and completed in the holder's destructor.
bool ok = file_segment->reserve(bytes_to_write);
bool ok = file_segment->reserve(bytes_to_write, &reserve_stat);
if (!ok)
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog());
{
String reserve_stat_msg;
for (const auto & [kind, stat] : reserve_stat.stat_by_kind)
reserve_stat_msg += fmt::format("{} hold {}, can release {}; ",
toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size));
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})",
bytes_to_write,
file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache",
reserve_stat_msg,
file_segment->getInfoForLog()
);
}
try
{

View File

@ -166,7 +166,6 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int BAD_GET;
extern const int UNKNOWN_DATABASE;
extern const int UNKNOWN_TABLE;
extern const int TABLE_ALREADY_EXISTS;
@ -181,6 +180,7 @@ namespace ErrorCodes
extern const int UNKNOWN_FUNCTION;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int CLUSTER_DOESNT_EXIST;
}
#define SHUTDOWN(log, desc, ptr, method) do \
@ -1089,52 +1089,32 @@ ConfigurationPtr Context::getUsersConfig()
return shared->users_config;
}
void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_)
void Context::setUser(const UUID & user_id_, const std::optional<const std::vector<UUID>> & current_roles_)
{
/// Prepare lists of user's profiles, constraints, settings, roles.
/// NOTE: AccessControl::read<User>() and other AccessControl's functions may require some IO work,
/// so Context::getLock() must be unlocked while we're doing this.
std::shared_ptr<const User> user;
std::shared_ptr<const ContextAccess> temp_access;
if (set_current_profiles_ || set_current_roles_ || set_current_database_)
{
std::optional<ContextAccessParams> params;
{
auto lock = getLock();
params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info });
}
/// `temp_access` is used here only to extract information about the user, not to actually check access.
/// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this.
temp_access = getAccessControl().getContextAccess(*params);
user = temp_access->getUser();
}
auto user = getAccessControl().read<User>(user_id_);
std::shared_ptr<const SettingsProfilesInfo> profiles;
if (set_current_profiles_)
profiles = temp_access->getDefaultProfileInfo();
std::optional<std::vector<UUID>> roles;
if (set_current_roles_)
roles = user->granted_roles.findGranted(user->default_roles);
String database;
if (set_current_database_)
database = user->default_database;
auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
auto enabled_roles = getAccessControl().getEnabledRolesInfo(new_current_roles, {});
auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
const auto & database = user->default_database;
/// Apply user's profiles, constraints, settings, roles.
auto lock = getLock();
setUserID(user_id_);
if (profiles)
{
/// A profile can specify a value and a readonly constraint for same setting at the same time,
/// so we shouldn't check constraints here.
setCurrentProfiles(*profiles, /* check_constraints= */ false);
}
/// A profile can specify a value and a readonly constraint for same setting at the same time,
/// so we shouldn't check constraints here.
setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false);
if (roles)
setCurrentRoles(*roles);
setCurrentRoles(new_current_roles);
/// It's optional to specify the DEFAULT DATABASE in the user's definition.
if (!database.empty())
setCurrentDatabase(database);
}
@ -3073,7 +3053,7 @@ UInt16 Context::getServerPort(const String & port_name) const
{
auto it = shared->server_ports.find(port_name);
if (it == shared->server_ports.end())
throw Exception(ErrorCodes::BAD_GET, "There is no port named {}", port_name);
throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "There is no port named {}", port_name);
else
return it->second;
}
@ -3082,7 +3062,7 @@ std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) c
{
if (auto res = tryGetCluster(cluster_name))
return res;
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name);
throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name);
}
@ -4550,14 +4530,6 @@ ReadSettings Context::getReadSettings() const
return res;
}
ReadSettings Context::getBackupReadSettings() const
{
ReadSettings read_settings = getReadSettings();
read_settings.remote_throttler = getBackupsThrottler();
read_settings.local_throttler = getBackupsThrottler();
return read_settings;
}
WriteSettings Context::getWriteSettings() const
{
WriteSettings res;

Some files were not shown because too many files have changed in this diff Show More