mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' into rename_allow_deprecated_functions
This commit is contained in:
commit
56cbcfe431
63
.github/PULL_REQUEST_TEMPLATE.md
vendored
63
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -46,42 +46,35 @@ At a minimum, the following information should be added (but add more as needed)
|
||||
|
||||
**NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
|
||||
**NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
|
||||
|
||||
#### Run these jobs only (required builds will be added automatically):
|
||||
- [ ] <!---ci_include_integration--> Integration Tests
|
||||
- [ ] <!---ci_include_stateless--> Stateless tests
|
||||
- [ ] <!---ci_include_stateful--> Stateful tests
|
||||
- [ ] <!---ci_include_unit--> Unit tests
|
||||
- [ ] <!---ci_include_performance--> Performance tests
|
||||
- [ ] <!---ci_include_aarch64--> All with aarch64
|
||||
- [ ] <!---ci_include_asan--> All with ASAN
|
||||
- [ ] <!---ci_include_tsan--> All with TSAN
|
||||
- [ ] <!---ci_include_analyzer--> All with Analyzer
|
||||
- [ ] <!---ci_include_azure --> All with Azure
|
||||
- [ ] <!---ci_include_KEYWORD--> Add your option here
|
||||
|
||||
#### Deny these jobs:
|
||||
- [ ] <!---ci_exclude_fast--> Fast test
|
||||
- [ ] <!---ci_exclude_integration--> Integration Tests
|
||||
- [ ] <!---ci_exclude_stateless--> Stateless tests
|
||||
- [ ] <!---ci_exclude_stateful--> Stateful tests
|
||||
- [ ] <!---ci_exclude_performance--> Performance tests
|
||||
- [ ] <!---ci_exclude_asan--> All with ASAN
|
||||
- [ ] <!---ci_exclude_tsan--> All with TSAN
|
||||
- [ ] <!---ci_exclude_msan--> All with MSAN
|
||||
- [ ] <!---ci_exclude_ubsan--> All with UBSAN
|
||||
- [ ] <!---ci_exclude_coverage--> All with Coverage
|
||||
- [ ] <!---ci_exclude_aarch64--> All with Aarch64
|
||||
|
||||
#### Extra options:
|
||||
- [ ] <!---ci_include_integration--> Allow: Integration Tests
|
||||
- [ ] <!---ci_include_stateless--> Allow: Stateless tests
|
||||
- [ ] <!---ci_include_stateful--> Allow: Stateful tests
|
||||
- [ ] <!---ci_include_unit--> Allow: Unit tests
|
||||
- [ ] <!---ci_include_performance--> Allow: Performance tests
|
||||
- [ ] <!---ci_include_aarch64--> Allow: All with aarch64
|
||||
- [ ] <!---ci_include_asan--> Allow: All with ASAN
|
||||
- [ ] <!---ci_include_tsan--> Allow: All with TSAN
|
||||
- [ ] <!---ci_include_analyzer--> Allow: All with Analyzer
|
||||
- [ ] <!---ci_include_azure --> Allow: All with Azure
|
||||
- [ ] <!---ci_include_KEYWORD--> Allow: Add your option here
|
||||
---
|
||||
- [ ] <!---ci_exclude_fast--> Exclude: Fast test
|
||||
- [ ] <!---ci_exclude_integration--> Exclude: Integration Tests
|
||||
- [ ] <!---ci_exclude_stateless--> Exclude: Stateless tests
|
||||
- [ ] <!---ci_exclude_stateful--> Exclude: Stateful tests
|
||||
- [ ] <!---ci_exclude_performance--> Exclude: Performance tests
|
||||
- [ ] <!---ci_exclude_asan--> Exclude: All with ASAN
|
||||
- [ ] <!---ci_exclude_tsan--> Exclude: All with TSAN
|
||||
- [ ] <!---ci_exclude_msan--> Exclude: All with MSAN
|
||||
- [ ] <!---ci_exclude_ubsan--> Exclude: All with UBSAN
|
||||
- [ ] <!---ci_exclude_coverage--> Exclude: All with Coverage
|
||||
- [ ] <!---ci_exclude_aarch64--> Exclude: All with Aarch64
|
||||
---
|
||||
- [ ] <!---do_not_test--> do not test (only style check)
|
||||
- [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
|
||||
- [ ] <!---no_ci_cache--> disable CI cache (job reuse)
|
||||
|
||||
#### Only specified batches in multi-batch jobs:
|
||||
- [ ] <!---batch_0--> 1
|
||||
- [ ] <!---batch_1--> 2
|
||||
- [ ] <!---batch_2--> 3
|
||||
- [ ] <!---batch_3--> 4
|
||||
|
||||
- [ ] <!---batch_0--> allow: batch 1 for multi-batch jobs
|
||||
- [ ] <!---batch_1--> allow: batch 2
|
||||
- [ ] <!---batch_2--> allow: batch 3
|
||||
- [ ] <!---batch_3_4_5--> allow: batch 4, 5 and 6
|
||||
</details>
|
||||
|
23
.github/workflows/master.yml
vendored
23
.github/workflows/master.yml
vendored
@ -27,15 +27,16 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 sync_pr.py --merge || :
|
||||
- name: Python unit tests
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
echo "Testing the main ci directory"
|
||||
python3 -m unittest discover -s . -p 'test_*.py'
|
||||
for dir in *_lambda/; do
|
||||
echo "Testing $dir"
|
||||
python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
done
|
||||
# Runs in MQ:
|
||||
# - name: Python unit tests
|
||||
# run: |
|
||||
# cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# echo "Testing the main ci directory"
|
||||
# python3 -m unittest discover -s . -p 'test_*.py'
|
||||
# for dir in *_lambda/; do
|
||||
# echo "Testing $dir"
|
||||
# python3 -m unittest discover -s "$dir" -p 'test_*.py'
|
||||
# done
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
@ -135,7 +136,7 @@ jobs:
|
||||
|
||||
MarkReleaseReady:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs: [RunConfig, Builds_1]
|
||||
needs: [RunConfig, Builds_1, Builds_2]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Debug
|
||||
@ -162,7 +163,7 @@ jobs:
|
||||
python3 mark_release_ready.py
|
||||
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
|
7
.github/workflows/pull_request.yml
vendored
7
.github/workflows/pull_request.yml
vendored
@ -33,9 +33,12 @@ jobs:
|
||||
clear-repository: true # to ensure correct digests
|
||||
fetch-depth: 0 # to get a version
|
||||
filter: tree:0
|
||||
- name: Cancel Sync PR workflow
|
||||
- name: Cancel previous Sync PR workflow
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
|
||||
- name: Set pending Sync status
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --set-pending-status
|
||||
- name: Labels check
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
@ -177,7 +180,7 @@ jobs:
|
||||
################################# Stage Final #################################
|
||||
#
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
|
5
.github/workflows/reusable_build.yml
vendored
5
.github/workflows/reusable_build.yml
vendored
@ -33,6 +33,10 @@ name: Build ClickHouse
|
||||
additional_envs:
|
||||
description: additional ENV variables to setup the job
|
||||
type: string
|
||||
secrets:
|
||||
secret_envs:
|
||||
description: if given, it's passed to the environments
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
Build:
|
||||
@ -54,6 +58,7 @@ jobs:
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
${{inputs.additional_envs}}
|
||||
${{secrets.secret_envs}}
|
||||
DOCKER_TAG<<DOCKER_JSON
|
||||
${{ toJson(fromJson(inputs.data).docker_data.images) }}
|
||||
DOCKER_JSON
|
||||
|
6
.github/workflows/reusable_build_stage.yml
vendored
6
.github/workflows/reusable_build_stage.yml
vendored
@ -13,6 +13,10 @@ name: BuildStageWF
|
||||
description: ci data
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
secret_envs:
|
||||
description: if given, it's passed to the environments
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
s:
|
||||
@ -30,3 +34,5 @@ jobs:
|
||||
# for now let's do I deep checkout for builds
|
||||
checkout_depth: 0
|
||||
data: ${{ inputs.data }}
|
||||
secrets:
|
||||
secret_envs: ${{ secrets.secret_envs }}
|
||||
|
6
.github/workflows/reusable_test_stage.yml
vendored
6
.github/workflows/reusable_test_stage.yml
vendored
@ -10,6 +10,10 @@ name: StageWF
|
||||
description: ci data
|
||||
type: string
|
||||
required: true
|
||||
secrets:
|
||||
secret_envs:
|
||||
description: if given, it's passed to the environments
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
s:
|
||||
@ -23,3 +27,5 @@ jobs:
|
||||
test_name: ${{ matrix.job_name_and_runner_type.job_name }}
|
||||
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
|
||||
data: ${{ inputs.data }}
|
||||
secrets:
|
||||
secret_envs: ${{ secrets.secret_envs }}
|
||||
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be
|
||||
Subproject commit eb96e740453ae27afa1f367ba19f99bdcb38484d
|
@ -202,8 +202,7 @@ Example:
|
||||
CREATE TABLE s3queue_engine_table (name String, value UInt32)
|
||||
ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
|
||||
SETTINGS
|
||||
mode = 'unordered',
|
||||
keeper_path = '/clickhouse/s3queue/';
|
||||
mode = 'unordered';
|
||||
|
||||
CREATE TABLE stats (name String, value UInt32)
|
||||
ENGINE = MergeTree() ORDER BY name;
|
||||
|
@ -3665,6 +3665,26 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
|
||||
|
||||
Ignore absence of file if it does not exist when reading certain keys.
|
||||
|
||||
Possible values:
|
||||
- 1 — `SELECT` returns empty result.
|
||||
- 0 — `SELECT` throws an exception.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_validate_request_settings {#s3_validate_request_settings}
|
||||
|
||||
Enables s3 request settings validation.
|
||||
|
||||
Possible values:
|
||||
- 1 — validate settings.
|
||||
- 0 — do not validate settings.
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
|
||||
|
||||
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
|
||||
@ -3697,6 +3717,56 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match}
|
||||
|
||||
Throw an error if matched zero files according to glob expansion rules.
|
||||
|
||||
Possible values:
|
||||
- 1 — `SELECT` throws an exception.
|
||||
- 0 — `SELECT` returns empty result.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist}
|
||||
|
||||
Ignore absence of file if it does not exist when reading certain keys.
|
||||
|
||||
Possible values:
|
||||
- 1 — `SELECT` returns empty result.
|
||||
- 0 — `SELECT` throws an exception.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match}
|
||||
|
||||
Throw an error if matched zero files according to glob expansion rules.
|
||||
|
||||
Possible values:
|
||||
- 1 — `SELECT` throws an exception.
|
||||
- 0 — `SELECT` returns empty result.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist}
|
||||
|
||||
Ignore absence of file if it does not exist when reading certain keys.
|
||||
|
||||
Possible values:
|
||||
- 1 — `SELECT` returns empty result.
|
||||
- 0 — `SELECT` throws an exception.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## azure_skip_empty_files {#azure_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in S3 engine.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_url_skip_empty_files {#engine_url_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
|
||||
@ -5468,3 +5538,15 @@ Defines how MySQL types are converted to corresponding ClickHouse types. A comma
|
||||
- `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`.
|
||||
- `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`.
|
||||
- `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`.
|
||||
|
||||
## cross_join_min_rows_to_compress
|
||||
|
||||
Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
|
||||
|
||||
Default value: `10000000`.
|
||||
|
||||
## cross_join_min_bytes_to_compress
|
||||
|
||||
Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
|
||||
|
||||
Default value: `1GiB`.
|
||||
|
@ -1975,143 +1975,3 @@ Result:
|
||||
│ 2,"good" │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## snowflakeToDateTime
|
||||
|
||||
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
snowflakeToDateTime(value[, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
|
||||
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
|
||||
│ 2021-08-15 10:57:56 │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## snowflakeToDateTime64
|
||||
|
||||
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
snowflakeToDateTime64(value[, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
|
||||
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
|
||||
│ 2021-08-15 10:58:19.841 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## dateTimeToSnowflake
|
||||
|
||||
Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
dateTimeToSnowflake(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime](../data-types/datetime.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─dateTimeToSnowflake(dt)─┐
|
||||
│ 1426860702823350272 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## dateTime64ToSnowflake
|
||||
|
||||
Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
dateTime64ToSnowflake(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─dateTime64ToSnowflake(dt64)─┐
|
||||
│ 1426860704886947840 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
@ -668,7 +668,7 @@ Result:
|
||||
└──────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## serverUUID()
|
||||
## serverUUID
|
||||
|
||||
Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts.
|
||||
|
||||
@ -682,6 +682,275 @@ serverUUID()
|
||||
|
||||
- The UUID of the server. [UUID](../data-types/uuid.md).
|
||||
|
||||
## generateSnowflakeID
|
||||
|
||||
Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
|
||||
|
||||
The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
|
||||
For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
|
||||
In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
|
||||
|
||||
Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.
|
||||
|
||||
```
|
||||
0 1 2 3
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
|0| timestamp |
|
||||
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
| | machine_id | machine_seq_num |
|
||||
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
|
||||
```
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
generateSnowflakeID([expr])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A value of type UInt64.
|
||||
|
||||
**Example**
|
||||
|
||||
First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tab (id UInt64) ENGINE = Memory;
|
||||
|
||||
INSERT INTO tab SELECT generateSnowflakeID();
|
||||
|
||||
SELECT * FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────────────────id─┐
|
||||
│ 7199081390080409600 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
**Example with multiple Snowflake IDs generated per row**
|
||||
|
||||
```sql
|
||||
SELECT generateSnowflakeID(1), generateSnowflakeID(2);
|
||||
|
||||
┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐
|
||||
│ 7199081609652224000 │ 7199081609652224001 │
|
||||
└────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
## generateSnowflakeIDThreadMonotonic
|
||||
|
||||
Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
|
||||
|
||||
The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
|
||||
For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
|
||||
In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
|
||||
|
||||
This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests.
|
||||
Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
|
||||
|
||||
```
|
||||
0 1 2 3
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
|0| timestamp |
|
||||
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
| | machine_id | machine_seq_num |
|
||||
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
|
||||
```
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
generateSnowflakeIDThreadMonotonic([expr])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A value of type UInt64.
|
||||
|
||||
**Example**
|
||||
|
||||
First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tab (id UInt64) ENGINE = Memory;
|
||||
|
||||
INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic();
|
||||
|
||||
SELECT * FROM tab;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────────────────id─┐
|
||||
│ 7199082832006627328 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
**Example with multiple Snowflake IDs generated per row**
|
||||
|
||||
```sql
|
||||
SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2);
|
||||
|
||||
┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐
|
||||
│ 7199082940311945216 │ 7199082940316139520 │
|
||||
└───────────────────────────────────────┴───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## snowflakeToDateTime
|
||||
|
||||
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
snowflakeToDateTime(value[, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
|
||||
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
|
||||
│ 2021-08-15 10:57:56 │
|
||||
└──────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## snowflakeToDateTime64
|
||||
|
||||
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
snowflakeToDateTime64(value[, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
|
||||
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
|
||||
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
|
||||
│ 2021-08-15 10:58:19.841 │
|
||||
└────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## dateTimeToSnowflake
|
||||
|
||||
Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
dateTimeToSnowflake(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime](../data-types/datetime.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─dateTimeToSnowflake(dt)─┐
|
||||
│ 1426860702823350272 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## dateTime64ToSnowflake
|
||||
|
||||
Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
dateTime64ToSnowflake(value)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─dateTime64ToSnowflake(dt64)─┐
|
||||
│ 1426860704886947840 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other)
|
||||
|
@ -10,6 +10,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int KEEPER_EXCEPTION;
|
||||
}
|
||||
|
||||
@ -441,7 +442,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
|
||||
new_members = query->args[1].safeGet<String>();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation);
|
||||
}
|
||||
|
||||
auto response = client->zookeeper->reconfig(joining, leaving, new_members);
|
||||
|
@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail)
|
||||
ret("AVX2");
|
||||
case InstructionFail::AVX512:
|
||||
ret("AVX512");
|
||||
#undef ret
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
|
@ -144,8 +144,7 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("Could not parse " + file_path);
|
||||
e.rethrow();
|
||||
UNREACHABLE();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -258,7 +258,7 @@ namespace
|
||||
case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel();
|
||||
case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel();
|
||||
}
|
||||
UNREACHABLE();
|
||||
chassert(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -257,8 +257,7 @@ std::vector<UUID> IAccessStorage::insert(const std::vector<AccessEntityPtr> & mu
|
||||
}
|
||||
e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str);
|
||||
}
|
||||
e.rethrow();
|
||||
UNREACHABLE();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
@ -361,8 +360,7 @@ std::vector<UUID> IAccessStorage::remove(const std::vector<UUID> & ids, bool thr
|
||||
}
|
||||
e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str);
|
||||
}
|
||||
e.rethrow();
|
||||
UNREACHABLE();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
@ -458,8 +456,7 @@ std::vector<UUID> IAccessStorage::update(const std::vector<UUID> & ids, const Up
|
||||
}
|
||||
e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str);
|
||||
}
|
||||
e.rethrow();
|
||||
UNREACHABLE();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -60,14 +60,13 @@ struct GroupArrayTrait
|
||||
template <typename Trait>
|
||||
constexpr const char * getNameByTrait()
|
||||
{
|
||||
if (Trait::last)
|
||||
if constexpr (Trait::last)
|
||||
return "groupArrayLast";
|
||||
if (Trait::sampler == Sampler::NONE)
|
||||
return "groupArray";
|
||||
else if (Trait::sampler == Sampler::RNG)
|
||||
return "groupArraySample";
|
||||
|
||||
UNREACHABLE();
|
||||
switch (Trait::sampler)
|
||||
{
|
||||
case Sampler::NONE: return "groupArray";
|
||||
case Sampler::RNG: return "groupArraySample";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -414,7 +414,6 @@ public:
|
||||
break;
|
||||
return (i == events_size) ? base - i : unmatched_idx;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
|
@ -463,7 +463,6 @@ public:
|
||||
return "sumWithOverflow";
|
||||
else if constexpr (Type == AggregateFunctionTypeSumKahan)
|
||||
return "sumKahan";
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
explicit AggregateFunctionSum(const DataTypes & argument_types_)
|
||||
|
@ -331,6 +331,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidatio
|
||||
if (query_node_typed.hasOrderBy())
|
||||
validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode());
|
||||
|
||||
if (query_node_typed.hasInterpolate())
|
||||
validate_group_by_columns_visitor.visit(query_node_typed.getInterpolate());
|
||||
|
||||
validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode());
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/SharedThreadPools.h>
|
||||
#include <IO/HTTPHeaderEntries.h>
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
|
||||
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
|
||||
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
|
||||
@ -30,7 +29,7 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const StorageAzureConfiguration & configuration_,
|
||||
bool allow_azure_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
@ -39,14 +38,13 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
|
||||
, configuration(configuration_)
|
||||
{
|
||||
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
|
||||
auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true);
|
||||
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true});
|
||||
|
||||
object_storage = std::make_unique<AzureObjectStorage>(
|
||||
"BackupReaderAzureBlobStorage",
|
||||
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
|
||||
std::move(client_ptr),
|
||||
StorageAzureBlob::createSettings(context_),
|
||||
configuration.container,
|
||||
configuration.createSettings(context_),
|
||||
configuration_.container,
|
||||
configuration.getConnectionURL().toString());
|
||||
|
||||
client = object_storage->getAzureBlobStorageClient();
|
||||
@ -121,7 +119,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
|
||||
|
||||
|
||||
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const StorageAzureConfiguration & configuration_,
|
||||
bool allow_azure_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
@ -131,13 +129,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
|
||||
, configuration(configuration_)
|
||||
{
|
||||
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
|
||||
auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container);
|
||||
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true});
|
||||
|
||||
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
|
||||
std::move(client_ptr),
|
||||
StorageAzureBlob::createSettings(context_),
|
||||
configuration_.container,
|
||||
configuration.createSettings(context_),
|
||||
configuration.container,
|
||||
configuration_.getConnectionURL().toString());
|
||||
client = object_storage->getAzureBlobStorageClient();
|
||||
auto settings_copy = *object_storage->getSettings();
|
||||
@ -145,8 +143,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
|
||||
settings = std::make_unique<const AzureObjectStorageSettings>(settings_copy);
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length)
|
||||
void BackupWriterAzureBlobStorage::copyFileFromDisk(
|
||||
const String & path_in_backup,
|
||||
DiskPtr src_disk,
|
||||
const String & src_path,
|
||||
bool copy_encrypted,
|
||||
UInt64 start_pos,
|
||||
UInt64 length)
|
||||
{
|
||||
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
|
||||
auto source_data_source_description = src_disk->getDataSourceDescription();
|
||||
@ -196,9 +199,15 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
|
||||
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
|
||||
}
|
||||
|
||||
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
|
||||
void BackupWriterAzureBlobStorage::copyDataToFile(
|
||||
const String & path_in_backup,
|
||||
const CreateReadBufferFunction & create_read_buffer,
|
||||
UInt64 start_pos,
|
||||
UInt64 length)
|
||||
{
|
||||
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings,
|
||||
copyDataToAzureBlobStorageFile(
|
||||
create_read_buffer, start_pos, length, client, configuration.container,
|
||||
fs::path(configuration.blob_path) / path_in_backup, settings,
|
||||
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
|
||||
}
|
||||
|
||||
@ -217,7 +226,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
|
||||
object_storage->listObjects(key,children,/*max_keys*/0);
|
||||
if (children.empty())
|
||||
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
|
||||
return children[0].metadata.size_bytes;
|
||||
return children[0]->metadata->size_bytes;
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
|
||||
|
@ -5,8 +5,8 @@
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Backups/BackupIO_Default.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Storages/ObjectStorage/Azure/Configuration.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -17,24 +17,30 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const StorageAzureConfiguration & configuration_,
|
||||
bool allow_azure_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
~BackupReaderAzureBlobStorage() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
|
||||
|
||||
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
|
||||
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
|
||||
void copyFileToDisk(
|
||||
const String & path_in_backup,
|
||||
size_t file_size,
|
||||
bool encrypted_in_backup,
|
||||
DiskPtr destination_disk,
|
||||
const String & destination_path,
|
||||
WriteMode write_mode) override;
|
||||
|
||||
private:
|
||||
const DataSourceDescription data_source_description;
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
StorageAzureConfiguration configuration;
|
||||
std::unique_ptr<AzureObjectStorage> object_storage;
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings;
|
||||
};
|
||||
@ -43,21 +49,32 @@ class BackupWriterAzureBlobStorage : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterAzureBlobStorage(
|
||||
StorageAzureBlob::Configuration configuration_,
|
||||
const StorageAzureConfiguration & configuration_,
|
||||
bool allow_azure_native_copy,
|
||||
const ReadSettings & read_settings_,
|
||||
const WriteSettings & write_settings_,
|
||||
const ContextPtr & context_,
|
||||
bool attempt_to_create_container);
|
||||
|
||||
~BackupWriterAzureBlobStorage() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
UInt64 getFileSize(const String & file_name) override;
|
||||
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
|
||||
|
||||
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
|
||||
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
|
||||
void copyDataToFile(
|
||||
const String & path_in_backup,
|
||||
const CreateReadBufferFunction & create_read_buffer,
|
||||
UInt64 start_pos,
|
||||
UInt64 length) override;
|
||||
|
||||
void copyFileFromDisk(
|
||||
const String & path_in_backup,
|
||||
DiskPtr src_disk,
|
||||
const String & src_path,
|
||||
bool copy_encrypted,
|
||||
UInt64 start_pos,
|
||||
UInt64 length) override;
|
||||
|
||||
void copyFile(const String & destination, const String & source, size_t size) override;
|
||||
|
||||
@ -67,9 +84,10 @@ public:
|
||||
private:
|
||||
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
|
||||
void removeFilesBatch(const Strings & file_names);
|
||||
|
||||
const DataSourceDescription data_source_description;
|
||||
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
StorageAzureConfiguration configuration;
|
||||
std::unique_ptr<AzureObjectStorage> object_storage;
|
||||
std::shared_ptr<const AzureObjectStorageSettings> settings;
|
||||
};
|
||||
|
@ -131,10 +131,10 @@ BackupReaderS3::BackupReaderS3(
|
||||
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
|
||||
@ -222,10 +222,10 @@ BackupWriterS3::BackupWriterS3(
|
||||
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
|
||||
, s3_uri(s3_uri_)
|
||||
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
|
||||
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
|
||||
{
|
||||
auto & request_settings = s3_settings.request_settings;
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
request_settings.setStorageClassName(storage_class_name);
|
||||
|
@ -5,11 +5,11 @@
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
#include <Backups/BackupIO_AzureBlobStorage.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
#include <Backups/BackupImpl.h>
|
||||
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Storages/ObjectStorage/Azure/Configuration.h>
|
||||
#include <filesystem>
|
||||
#endif
|
||||
|
||||
@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
|
||||
const String & id_arg = params.backup_info.id_arg;
|
||||
const auto & args = params.backup_info.args;
|
||||
|
||||
StorageAzureBlob::Configuration configuration;
|
||||
StorageAzureConfiguration configuration;
|
||||
|
||||
if (!id_arg.empty())
|
||||
{
|
||||
@ -81,10 +81,11 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
|
||||
}
|
||||
|
||||
if (args.size() > 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
|
||||
|
||||
if (args.size() == 1)
|
||||
configuration.blob_path = args[0].safeGet<String>();
|
||||
configuration.setPath(args[0].safeGet<String>());
|
||||
|
||||
}
|
||||
else
|
||||
@ -116,12 +117,16 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
|
||||
}
|
||||
|
||||
BackupImpl::ArchiveParams archive_params;
|
||||
if (hasRegisteredArchiveFileExtension(configuration.blob_path))
|
||||
if (hasRegisteredArchiveFileExtension(configuration.getPath()))
|
||||
{
|
||||
if (params.is_internal_backup)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
|
||||
|
||||
archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
|
||||
auto path = configuration.getPath();
|
||||
auto filename = removeFileNameFromURL(path);
|
||||
configuration.setPath(path);
|
||||
|
||||
archive_params.archive_name = filename;
|
||||
archive_params.compression_method = params.compression_method;
|
||||
archive_params.compression_level = params.compression_level;
|
||||
archive_params.password = params.password;
|
||||
|
@ -115,8 +115,11 @@ if (TARGET ch_contrib::nats_io)
|
||||
add_headers_and_sources(dbms Storages/NATS)
|
||||
endif()
|
||||
|
||||
add_headers_and_sources(dbms Storages/DataLakes)
|
||||
add_headers_and_sources(dbms Storages/DataLakes/Iceberg)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/Azure)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/S3)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
||||
add_headers_and_sources(dbms Common/NamedCollections)
|
||||
|
||||
if (TARGET ch_contrib::amqp_cpp)
|
||||
@ -144,7 +147,6 @@ if (TARGET ch_contrib::azure_sdk)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::hdfs)
|
||||
add_headers_and_sources(dbms Storages/HDFS)
|
||||
add_headers_and_sources(dbms Disks/ObjectStorages/HDFS)
|
||||
endif()
|
||||
|
||||
|
@ -168,6 +168,9 @@
|
||||
M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
|
||||
M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
|
||||
M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \
|
||||
M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \
|
||||
M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \
|
||||
M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \
|
||||
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
|
||||
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
|
||||
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
|
||||
|
@ -41,7 +41,6 @@ UInt8 getDayOfWeek(const cctz::civil_day & date)
|
||||
case cctz::weekday::saturday: return 6;
|
||||
case cctz::weekday::sunday: return 7;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
inline cctz::time_point<cctz::seconds> lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date)
|
||||
|
@ -34,8 +34,6 @@ Int64 IntervalKind::toAvgNanoseconds() const
|
||||
default:
|
||||
return toAvgSeconds() * NANOSECONDS_PER_SECOND;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Int32 IntervalKind::toAvgSeconds() const
|
||||
@ -54,7 +52,6 @@ Int32 IntervalKind::toAvgSeconds() const
|
||||
case IntervalKind::Kind::Quarter: return 7889238; /// Exactly 1/4 of a year.
|
||||
case IntervalKind::Kind::Year: return 31556952; /// The average length of a Gregorian year is equal to 365.2425 days
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Float64 IntervalKind::toSeconds() const
|
||||
@ -80,7 +77,6 @@ Float64 IntervalKind::toSeconds() const
|
||||
default:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not possible to get precise number of seconds in non-precise interval");
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool IntervalKind::isFixedLength() const
|
||||
@ -99,7 +95,6 @@ bool IntervalKind::isFixedLength() const
|
||||
case IntervalKind::Kind::Quarter:
|
||||
case IntervalKind::Kind::Year: return false;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
|
||||
@ -141,7 +136,6 @@ const char * IntervalKind::toKeyword() const
|
||||
case IntervalKind::Kind::Quarter: return "QUARTER";
|
||||
case IntervalKind::Kind::Year: return "YEAR";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
@ -161,7 +155,6 @@ const char * IntervalKind::toLowercasedKeyword() const
|
||||
case IntervalKind::Kind::Quarter: return "quarter";
|
||||
case IntervalKind::Kind::Year: return "year";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
@ -192,7 +185,6 @@ const char * IntervalKind::toDateDiffUnit() const
|
||||
case IntervalKind::Kind::Year:
|
||||
return "year";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
@ -223,7 +215,6 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
|
||||
case IntervalKind::Kind::Year:
|
||||
return "toIntervalYear";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
@ -257,7 +248,6 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
|
||||
case IntervalKind::Kind::Year:
|
||||
return "toYear";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
|
@ -54,8 +54,6 @@ String toString(TargetArch arch)
|
||||
case TargetArch::AMXTILE: return "amxtile";
|
||||
case TargetArch::AMXINT8: return "amxint8";
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -75,7 +75,6 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider)
|
||||
case MetricsProvider::Netlink:
|
||||
return "netlink";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool TasksStatsCounters::checkIfAvailable()
|
||||
|
@ -146,8 +146,6 @@ const char * errorMessage(Error code)
|
||||
case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored";
|
||||
case Error::ZNOTREADONLY: return "State-changing request is passed to read-only server";
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool isHardwareError(Error zk_return_code)
|
||||
|
@ -1259,11 +1259,13 @@ void ZooKeeper::initFeatureFlags()
|
||||
|
||||
void ZooKeeper::executeGenericRequest(
|
||||
const ZooKeeperRequestPtr & request,
|
||||
ResponseCallback callback)
|
||||
ResponseCallback callback,
|
||||
WatchCallbackPtr watch)
|
||||
{
|
||||
RequestInfo request_info;
|
||||
request_info.request = request;
|
||||
request_info.callback = callback;
|
||||
request_info.watch = watch;
|
||||
|
||||
pushRequest(std::move(request_info));
|
||||
}
|
||||
|
@ -139,7 +139,8 @@ public:
|
||||
|
||||
void executeGenericRequest(
|
||||
const ZooKeeperRequestPtr & request,
|
||||
ResponseCallback callback);
|
||||
ResponseCallback callback,
|
||||
WatchCallbackPtr watch = nullptr);
|
||||
|
||||
/// See the documentation about semantics of these methods in IKeeper class.
|
||||
|
||||
|
@ -466,7 +466,6 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
|
||||
sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
|
||||
return;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests()
|
||||
|
@ -21,6 +21,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/** NOTE DoubleDelta is surprisingly bad name. The only excuse is that it comes from an academic paper.
|
||||
* Most people will think that "double delta" is just applying delta transform twice.
|
||||
* But in fact it is something more than applying delta transform twice.
|
||||
@ -142,9 +147,9 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -163,9 +168,8 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size)
|
||||
case sizeof(UInt64):
|
||||
return std::numeric_limits<Int64>::max();
|
||||
default:
|
||||
assert(false && "only 1, 2, 4 and 8 data sizes are supported");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "only 1, 2, 4 and 8 data sizes are supported");
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
struct WriteSpec
|
||||
|
@ -5,6 +5,12 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining)
|
||||
{
|
||||
ClusterUpdateActions out;
|
||||
@ -79,7 +85,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA
|
||||
new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)});
|
||||
}
|
||||
else
|
||||
UNREACHABLE();
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected update");
|
||||
}
|
||||
|
||||
for (const auto & item : cfg->get_servers())
|
||||
|
@ -990,7 +990,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
|
||||
raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
|
||||
return Accepted;
|
||||
}
|
||||
UNREACHABLE();
|
||||
std::unreachable();
|
||||
}
|
||||
|
||||
ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/callOnce.h>
|
||||
#include <Disks/IO/IOUringReader.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Disks/IO/getIOUringReader.h>
|
||||
|
||||
#include <Core/ServerSettings.h>
|
||||
@ -145,9 +146,10 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
|
||||
mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
|
||||
|
||||
std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
|
||||
|
||||
mutable std::mutex keeper_dispatcher_mutex;
|
||||
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
|
||||
|
||||
};
|
||||
|
||||
ContextData::ContextData() = default;
|
||||
@ -453,6 +455,19 @@ std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper");
|
||||
}
|
||||
|
||||
const StorageS3Settings & Context::getStorageS3Settings() const
|
||||
{
|
||||
std::lock_guard lock(shared->mutex);
|
||||
|
||||
if (!shared->storage_s3_settings)
|
||||
{
|
||||
const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config();
|
||||
shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef());
|
||||
}
|
||||
|
||||
return *shared->storage_s3_settings;
|
||||
}
|
||||
|
||||
const ServerSettings & Context::getServerSettings() const
|
||||
{
|
||||
return shared->server_settings;
|
||||
|
@ -37,6 +37,7 @@ class FilesystemCacheLog;
|
||||
class FilesystemReadPrefetchesLog;
|
||||
class BlobStorageLog;
|
||||
class IOUringReader;
|
||||
class StorageS3Settings;
|
||||
|
||||
/// A small class which owns ContextShared.
|
||||
/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
|
||||
@ -162,6 +163,10 @@ public:
|
||||
|
||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
||||
|
||||
const StorageS3Settings & getStorageS3Settings() const;
|
||||
|
||||
const String & getUserName() const { static std::string user; return user; }
|
||||
|
||||
const ServerSettings & getServerSettings() const;
|
||||
|
||||
bool hasTraceCollector() const;
|
||||
|
@ -667,8 +667,6 @@ public:
|
||||
case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
|
||||
case Types::CustomType: return f(field.template get<CustomType>());
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
String dump() const;
|
||||
|
@ -116,6 +116,12 @@ class IColumn;
|
||||
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
|
||||
M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \
|
||||
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
|
||||
M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
|
||||
M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
|
||||
M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \
|
||||
M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
|
||||
M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
|
||||
M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
|
||||
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
|
||||
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
|
||||
M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
|
||||
@ -128,6 +134,7 @@ class IColumn;
|
||||
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
|
||||
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
|
||||
M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \
|
||||
M(Bool, azure_skip_empty_files, false, "Allow to skip empty files in azure table engine", 0) \
|
||||
M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
|
||||
|
@ -85,12 +85,20 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated functions"},
|
||||
{"24.6", {{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
|
||||
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
|
||||
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
|
||||
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
|
||||
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
|
||||
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
|
||||
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
|
||||
}},
|
||||
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
|
||||
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
|
||||
{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
|
||||
{"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
|
||||
{"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."},
|
||||
{"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
|
||||
{"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
|
||||
{"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
|
||||
{"http_max_chunk_size", 0, 0, "Internal limitation"},
|
||||
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
|
||||
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
|
||||
|
@ -36,7 +36,6 @@ String ISerialization::kindToString(Kind kind)
|
||||
case Kind::SPARSE:
|
||||
return "Sparse";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
ISerialization::Kind ISerialization::stringToKind(const String & str)
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/re2.h>
|
||||
|
@ -140,7 +140,6 @@ private:
|
||||
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
||||
return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t first_offset = 0;
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include <azure/core/io/body_stream.hpp>
|
||||
#include <Common/ThreadPoolTaskTracker.h>
|
||||
#include <Common/BufferAllocationPolicy.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
|
@ -79,14 +79,14 @@ private:
|
||||
|
||||
for (const auto & blob : blobs_list)
|
||||
{
|
||||
batch.emplace_back(
|
||||
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
|
||||
blob.Name,
|
||||
ObjectMetadata{
|
||||
static_cast<uint64_t>(blob.BlobSize),
|
||||
Poco::Timestamp::fromEpochTime(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(
|
||||
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
|
||||
{}});
|
||||
{}}));
|
||||
}
|
||||
|
||||
if (!blob_list_response.NextPageToken.HasValue() || blob_list_response.NextPageToken.Value().empty())
|
||||
@ -148,15 +148,15 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
|
||||
return false;
|
||||
}
|
||||
|
||||
ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const
|
||||
ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
|
||||
{
|
||||
auto settings_ptr = settings.get();
|
||||
auto client_ptr = client.get();
|
||||
|
||||
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, settings_ptr->list_object_keys_size);
|
||||
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys);
|
||||
}
|
||||
|
||||
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
|
||||
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
|
||||
{
|
||||
auto client_ptr = client.get();
|
||||
|
||||
@ -179,19 +179,19 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
|
||||
|
||||
for (const auto & blob : blobs_list)
|
||||
{
|
||||
children.emplace_back(
|
||||
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
|
||||
blob.Name,
|
||||
ObjectMetadata{
|
||||
static_cast<uint64_t>(blob.BlobSize),
|
||||
Poco::Timestamp::fromEpochTime(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(
|
||||
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
|
||||
{}});
|
||||
{}}));
|
||||
}
|
||||
|
||||
if (max_keys)
|
||||
{
|
||||
int keys_left = max_keys - static_cast<int>(children.size());
|
||||
size_t keys_left = max_keys - children.size();
|
||||
if (keys_left <= 0)
|
||||
break;
|
||||
options.PageSizeHint = keys_left;
|
||||
@ -346,9 +346,10 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
|
||||
{
|
||||
auto client_ptr = client.get();
|
||||
for (const auto & object : objects)
|
||||
{
|
||||
removeObjectImpl(object, client_ptr, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const
|
||||
{
|
||||
@ -366,9 +367,9 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
|
||||
{
|
||||
result.attributes.emplace();
|
||||
for (const auto & [key, value] : properties.Metadata)
|
||||
(*result.attributes)[key] = value;
|
||||
result.attributes[key] = value;
|
||||
}
|
||||
result.last_modified.emplace(static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count());
|
||||
result.last_modified = static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count();
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -397,7 +398,9 @@ void AzureObjectStorage::copyObject( /// NOLINT
|
||||
dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options);
|
||||
}
|
||||
|
||||
void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
|
||||
void AzureObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
|
||||
ContextPtr context, const ApplyNewSettingsOptions &)
|
||||
{
|
||||
auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context);
|
||||
settings.set(std::move(new_settings));
|
||||
|
@ -85,9 +85,9 @@ public:
|
||||
const String & object_namespace_,
|
||||
const String & description_);
|
||||
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
|
||||
|
||||
ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
|
||||
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
|
||||
|
||||
std::string getName() const override { return "AzureObjectStorage"; }
|
||||
|
||||
@ -144,7 +144,8 @@ public:
|
||||
void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
ContextPtr context,
|
||||
const ApplyNewSettingsOptions & options) override;
|
||||
|
||||
String getObjectsNamespace() const override { return object_namespace ; }
|
||||
|
||||
|
@ -176,7 +176,7 @@ std::unique_ptr<IObjectStorage> CachedObjectStorage::cloneObjectStorage(
|
||||
return object_storage->cloneObjectStorage(new_namespace, config, config_prefix, context);
|
||||
}
|
||||
|
||||
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
|
||||
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
|
||||
{
|
||||
object_storage->listObjects(path, children, max_keys);
|
||||
}
|
||||
@ -192,9 +192,10 @@ void CachedObjectStorage::shutdown()
|
||||
}
|
||||
|
||||
void CachedObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
|
||||
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
|
||||
ContextPtr context, const ApplyNewSettingsOptions & options)
|
||||
{
|
||||
object_storage->applyNewSettings(config, config_prefix, context);
|
||||
object_storage->applyNewSettings(config, config_prefix, context, options);
|
||||
}
|
||||
|
||||
String CachedObjectStorage::getObjectsNamespace() const
|
||||
|
@ -80,7 +80,7 @@ public:
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
|
||||
|
||||
ObjectMetadata getObjectMetadata(const std::string & path) const override;
|
||||
|
||||
@ -91,7 +91,8 @@ public:
|
||||
void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
ContextPtr context,
|
||||
const ApplyNewSettingsOptions & options) override;
|
||||
|
||||
String getObjectsNamespace() const override;
|
||||
|
||||
|
@ -544,7 +544,7 @@ void DiskObjectStorage::applyNewSettings(
|
||||
{
|
||||
/// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name
|
||||
const auto config_prefix = "storage_configuration.disks." + name;
|
||||
object_storage->applyNewSettings(config, config_prefix, context_);
|
||||
object_storage->applyNewSettings(config, config_prefix, context_, IObjectStorage::ApplyNewSettingsOptions{ .allow_client_change = true });
|
||||
|
||||
{
|
||||
std::unique_lock lock(resource_mutex);
|
||||
|
@ -364,18 +364,18 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
|
||||
for (const auto & object : objects)
|
||||
{
|
||||
|
||||
LOG_INFO(disk->log, "Calling restore for key for disk {}", object.relative_path);
|
||||
LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path);
|
||||
|
||||
/// Skip file operations objects. They will be processed separately.
|
||||
if (object.relative_path.find("/operations/") != String::npos)
|
||||
if (object->relative_path.find("/operations/") != String::npos)
|
||||
continue;
|
||||
|
||||
const auto [revision, _] = extractRevisionAndOperationFromKey(object.relative_path);
|
||||
const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path);
|
||||
/// Filter early if it's possible to get revision from key.
|
||||
if (revision > restore_information.revision)
|
||||
continue;
|
||||
|
||||
keys_names.push_back(object.relative_path);
|
||||
keys_names.push_back(object->relative_path);
|
||||
}
|
||||
|
||||
if (!keys_names.empty())
|
||||
@ -405,15 +405,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
|
||||
{
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
auto meta = source_object_storage->getObjectMetadata(key);
|
||||
auto object_attributes = meta.attributes;
|
||||
auto metadata = source_object_storage->getObjectMetadata(key);
|
||||
auto object_attributes = metadata.attributes;
|
||||
|
||||
String path;
|
||||
if (object_attributes.has_value())
|
||||
{
|
||||
/// Restore file if object has 'path' in metadata.
|
||||
auto path_entry = object_attributes->find("path");
|
||||
if (path_entry == object_attributes->end())
|
||||
auto path_entry = object_attributes.find("path");
|
||||
if (path_entry == object_attributes.end())
|
||||
{
|
||||
/// Such keys can remain after migration, we can skip them.
|
||||
LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key);
|
||||
@ -421,10 +419,6 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
|
||||
}
|
||||
|
||||
path = path_entry->second;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
disk->createDirectories(directoryPath(path));
|
||||
auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key));
|
||||
|
||||
@ -436,7 +430,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
|
||||
source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage);
|
||||
|
||||
auto tx = disk->metadata_storage->createTransaction();
|
||||
tx->addBlobToMetadata(path, object_key, meta.size_bytes);
|
||||
tx->addBlobToMetadata(path, object_key, metadata.size_bytes);
|
||||
tx->commit();
|
||||
|
||||
LOG_TRACE(disk->log, "Restored file {}", path);
|
||||
@ -475,10 +469,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
|
||||
|
||||
for (const auto & object : objects)
|
||||
{
|
||||
const auto [revision, operation] = extractRevisionAndOperationFromKey(object.relative_path);
|
||||
const auto [revision, operation] = extractRevisionAndOperationFromKey(object->relative_path);
|
||||
if (revision == UNKNOWN_REVISION)
|
||||
{
|
||||
LOG_WARNING(disk->log, "Skip key {} with unknown revision", object.relative_path);
|
||||
LOG_WARNING(disk->log, "Skip key {} with unknown revision", object->relative_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -491,7 +485,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
|
||||
if (send_metadata)
|
||||
revision_counter = revision - 1;
|
||||
|
||||
auto object_attributes = *(source_object_storage->getObjectMetadata(object.relative_path).attributes);
|
||||
auto object_attributes = source_object_storage->getObjectMetadata(object->relative_path).attributes;
|
||||
if (operation == rename)
|
||||
{
|
||||
auto from_path = object_attributes["from_path"];
|
||||
|
@ -1,12 +1,13 @@
|
||||
#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
|
||||
|
||||
#include <IO/copyData.h>
|
||||
#include <Storages/HDFS/WriteBufferFromHDFS.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h>
|
||||
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
|
||||
|
||||
#include <Storages/HDFS/ReadBufferFromHDFS.h>
|
||||
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
|
||||
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
#if USE_HDFS
|
||||
@ -18,28 +19,57 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int HDFS_ERROR;
|
||||
extern const int ACCESS_DENIED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::shutdown()
|
||||
void HDFSObjectStorage::initializeHDFSFS() const
|
||||
{
|
||||
if (initialized)
|
||||
return;
|
||||
|
||||
std::lock_guard lock(init_mutex);
|
||||
if (initialized)
|
||||
return;
|
||||
|
||||
hdfs_builder = createHDFSBuilder(url, config);
|
||||
hdfs_fs = createHDFSFS(hdfs_builder.get());
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::startup()
|
||||
std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & object) const
|
||||
{
|
||||
/// This is very unfortunate, but for disk HDFS we made a mistake
|
||||
/// and now its behaviour is inconsistent with S3 and Azure disks.
|
||||
/// The mistake is that for HDFS we write into metadata files whole URL + data directory + key,
|
||||
/// while for S3 and Azure we write there only data_directory + key.
|
||||
/// This leads us into ambiguity that for StorageHDFS we have just key in object.remote_path,
|
||||
/// but for DiskHDFS we have there URL as well.
|
||||
auto path = object.remote_path;
|
||||
if (path.starts_with(url))
|
||||
path = path.substr(url.size());
|
||||
if (path.starts_with("/"))
|
||||
path.substr(1);
|
||||
return path;
|
||||
}
|
||||
|
||||
ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
|
||||
{
|
||||
initializeHDFSFS();
|
||||
/// what ever data_source_description.description value is, consider that key as relative key
|
||||
return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32));
|
||||
chassert(data_directory.starts_with("/"));
|
||||
return ObjectStorageKey::createAsRelative(
|
||||
fs::path(url_without_path) / data_directory.substr(1), getRandomASCIIString(32));
|
||||
}
|
||||
|
||||
bool HDFSObjectStorage::exists(const StoredObject & object) const
|
||||
{
|
||||
const auto & path = object.remote_path;
|
||||
const size_t begin_of_path = path.find('/', path.find("//") + 2);
|
||||
const String remote_fs_object_path = path.substr(begin_of_path);
|
||||
return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str()));
|
||||
initializeHDFSFS();
|
||||
std::string path = object.remote_path;
|
||||
if (path.starts_with(url_without_path))
|
||||
path = path.substr(url_without_path.size());
|
||||
|
||||
return (0 == hdfsExists(hdfs_fs.get(), path.c_str()));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLINT
|
||||
@ -48,7 +78,10 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
|
||||
std::optional<size_t>,
|
||||
std::optional<size_t>) const
|
||||
{
|
||||
return std::make_unique<ReadBufferFromHDFS>(object.remote_path, object.remote_path, config, patchSettings(read_settings));
|
||||
initializeHDFSFS();
|
||||
auto path = extractObjectKeyFromURL(object);
|
||||
return std::make_unique<ReadBufferFromHDFS>(
|
||||
fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings));
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLINT
|
||||
@ -57,18 +90,15 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
|
||||
std::optional<size_t>,
|
||||
std::optional<size_t>) const
|
||||
{
|
||||
initializeHDFSFS();
|
||||
auto disk_read_settings = patchSettings(read_settings);
|
||||
auto read_buffer_creator =
|
||||
[this, disk_read_settings]
|
||||
(bool /* restricted_seek */, const StoredObject & object_) -> std::unique_ptr<ReadBufferFromFileBase>
|
||||
{
|
||||
const auto & path = object_.remote_path;
|
||||
size_t begin_of_path = path.find('/', path.find("//") + 2);
|
||||
auto hdfs_path = path.substr(begin_of_path);
|
||||
auto hdfs_uri = path.substr(0, begin_of_path);
|
||||
|
||||
auto path = extractObjectKeyFromURL(object_);
|
||||
return std::make_unique<ReadBufferFromHDFS>(
|
||||
hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
|
||||
fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
|
||||
};
|
||||
|
||||
return std::make_unique<ReadBufferFromRemoteFSGather>(
|
||||
@ -82,14 +112,21 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
|
||||
size_t buf_size,
|
||||
const WriteSettings & write_settings)
|
||||
{
|
||||
initializeHDFSFS();
|
||||
if (attributes.has_value())
|
||||
throw Exception(
|
||||
ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"HDFS API doesn't support custom attributes/metadata for stored objects");
|
||||
|
||||
std::string path = object.remote_path;
|
||||
if (path.starts_with("/"))
|
||||
path = path.substr(1);
|
||||
if (!path.starts_with(url))
|
||||
path = fs::path(url) / path;
|
||||
|
||||
/// Single O_WRONLY in libhdfs adds O_TRUNC
|
||||
return std::make_unique<WriteBufferFromHDFS>(
|
||||
object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size,
|
||||
path, config, settings->replication, patchSettings(write_settings), buf_size,
|
||||
mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND);
|
||||
}
|
||||
|
||||
@ -97,11 +134,13 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
|
||||
/// Remove file. Throws exception if file doesn't exists or it's a directory.
|
||||
void HDFSObjectStorage::removeObject(const StoredObject & object)
|
||||
{
|
||||
const auto & path = object.remote_path;
|
||||
const size_t begin_of_path = path.find('/', path.find("//") + 2);
|
||||
initializeHDFSFS();
|
||||
auto path = object.remote_path;
|
||||
if (path.starts_with(url_without_path))
|
||||
path = path.substr(url_without_path.size());
|
||||
|
||||
/// Add path from root to file name
|
||||
int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0);
|
||||
int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0);
|
||||
if (res == -1)
|
||||
throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path);
|
||||
|
||||
@ -109,27 +148,85 @@ void HDFSObjectStorage::removeObject(const StoredObject & object)
|
||||
|
||||
void HDFSObjectStorage::removeObjects(const StoredObjects & objects)
|
||||
{
|
||||
initializeHDFSFS();
|
||||
for (const auto & object : objects)
|
||||
removeObject(object);
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object)
|
||||
{
|
||||
initializeHDFSFS();
|
||||
if (exists(object))
|
||||
removeObject(object);
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
|
||||
{
|
||||
initializeHDFSFS();
|
||||
for (const auto & object : objects)
|
||||
removeObjectIfExists(object);
|
||||
}
|
||||
|
||||
ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const
|
||||
ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"HDFS API doesn't support custom attributes/metadata for stored objects");
|
||||
initializeHDFSFS();
|
||||
auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
|
||||
if (!file_info)
|
||||
throw Exception(ErrorCodes::HDFS_ERROR,
|
||||
"Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
|
||||
|
||||
ObjectMetadata metadata;
|
||||
metadata.size_bytes = static_cast<size_t>(file_info->mSize);
|
||||
metadata.last_modified = Poco::Timestamp::fromEpochTime(file_info->mLastMod);
|
||||
|
||||
hdfsFreeFileInfo(file_info, 1);
|
||||
return metadata;
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
|
||||
{
|
||||
initializeHDFSFS();
|
||||
LOG_TEST(log, "Trying to list files for {}", path);
|
||||
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length);
|
||||
|
||||
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
|
||||
{
|
||||
// ignore file not found exception, keep throw other exception,
|
||||
// libhdfs3 doesn't have function to get exception type, so use errno.
|
||||
throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}",
|
||||
path, String(hdfsGetLastError()));
|
||||
}
|
||||
|
||||
if (!ls.file_info && ls.length > 0)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
|
||||
}
|
||||
|
||||
LOG_TEST(log, "Listed {} files for {}", ls.length, path);
|
||||
|
||||
for (int i = 0; i < ls.length; ++i)
|
||||
{
|
||||
const String file_path = fs::path(ls.file_info[i].mName).lexically_normal();
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
if (is_directory)
|
||||
{
|
||||
listObjects(fs::path(file_path) / "", children, max_keys);
|
||||
}
|
||||
else
|
||||
{
|
||||
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
|
||||
String(file_path),
|
||||
ObjectMetadata{
|
||||
static_cast<uint64_t>(ls.file_info[i].mSize),
|
||||
Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod),
|
||||
{}}));
|
||||
}
|
||||
|
||||
if (max_keys && children.size() >= max_keys)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void HDFSObjectStorage::copyObject( /// NOLINT
|
||||
@ -139,6 +236,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT
|
||||
const WriteSettings & write_settings,
|
||||
std::optional<ObjectAttributes> object_to_attributes)
|
||||
{
|
||||
initializeHDFSFS();
|
||||
if (object_to_attributes.has_value())
|
||||
throw Exception(
|
||||
ErrorCodes::UNSUPPORTED_METHOD,
|
||||
@ -151,7 +249,10 @@ void HDFSObjectStorage::copyObject( /// NOLINT
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr)
|
||||
std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(
|
||||
const std::string &,
|
||||
const Poco::Util::AbstractConfiguration &,
|
||||
const std::string &, ContextPtr)
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning");
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <memory>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
@ -16,21 +16,13 @@ namespace DB
|
||||
|
||||
struct HDFSObjectStorageSettings
|
||||
{
|
||||
|
||||
HDFSObjectStorageSettings() = default;
|
||||
|
||||
size_t min_bytes_for_seek;
|
||||
int objects_chunk_size_to_delete;
|
||||
int replication;
|
||||
|
||||
HDFSObjectStorageSettings(
|
||||
int min_bytes_for_seek_,
|
||||
int objects_chunk_size_to_delete_,
|
||||
int replication_)
|
||||
HDFSObjectStorageSettings(int min_bytes_for_seek_, int replication_)
|
||||
: min_bytes_for_seek(min_bytes_for_seek_)
|
||||
, objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
|
||||
, replication(replication_)
|
||||
{}
|
||||
|
||||
size_t min_bytes_for_seek;
|
||||
int replication;
|
||||
};
|
||||
|
||||
|
||||
@ -43,20 +35,29 @@ public:
|
||||
HDFSObjectStorage(
|
||||
const String & hdfs_root_path_,
|
||||
SettingsPtr settings_,
|
||||
const Poco::Util::AbstractConfiguration & config_)
|
||||
const Poco::Util::AbstractConfiguration & config_,
|
||||
bool lazy_initialize)
|
||||
: config(config_)
|
||||
, hdfs_builder(createHDFSBuilder(hdfs_root_path_, config))
|
||||
, hdfs_fs(createHDFSFS(hdfs_builder.get()))
|
||||
, settings(std::move(settings_))
|
||||
, hdfs_root_path(hdfs_root_path_)
|
||||
, log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")"))
|
||||
{
|
||||
const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2);
|
||||
url = hdfs_root_path_;
|
||||
url_without_path = url.substr(0, begin_of_path);
|
||||
if (begin_of_path < url.size())
|
||||
data_directory = url.substr(begin_of_path);
|
||||
else
|
||||
data_directory = "/";
|
||||
|
||||
if (!lazy_initialize)
|
||||
initializeHDFSFS();
|
||||
}
|
||||
|
||||
std::string getName() const override { return "HDFSObjectStorage"; }
|
||||
|
||||
std::string getCommonKeyPrefix() const override { return hdfs_root_path; }
|
||||
std::string getCommonKeyPrefix() const override { return url; }
|
||||
|
||||
std::string getDescription() const override { return hdfs_root_path; }
|
||||
std::string getDescription() const override { return url; }
|
||||
|
||||
ObjectStorageType getType() const override { return ObjectStorageType::HDFS; }
|
||||
|
||||
@ -100,9 +101,7 @@ public:
|
||||
const WriteSettings & write_settings,
|
||||
std::optional<ObjectAttributes> object_to_attributes = {}) override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
void startup() override;
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
|
||||
|
||||
String getObjectsNamespace() const override { return ""; }
|
||||
|
||||
@ -116,13 +115,28 @@ public:
|
||||
|
||||
bool isRemote() const override { return true; }
|
||||
|
||||
void startup() override { }
|
||||
|
||||
void shutdown() override { }
|
||||
|
||||
private:
|
||||
void initializeHDFSFS() const;
|
||||
std::string extractObjectKeyFromURL(const StoredObject & object) const;
|
||||
|
||||
const Poco::Util::AbstractConfiguration & config;
|
||||
|
||||
HDFSBuilderWrapper hdfs_builder;
|
||||
HDFSFSPtr hdfs_fs;
|
||||
mutable HDFSBuilderWrapper hdfs_builder;
|
||||
mutable HDFSFSPtr hdfs_fs;
|
||||
|
||||
mutable std::mutex init_mutex;
|
||||
mutable std::atomic_bool initialized{false};
|
||||
|
||||
SettingsPtr settings;
|
||||
const std::string hdfs_root_path;
|
||||
std::string url;
|
||||
std::string url_without_path;
|
||||
std::string data_directory;
|
||||
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,16 +25,16 @@ bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
|
||||
return !files.empty();
|
||||
}
|
||||
|
||||
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, int) const
|
||||
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
|
||||
}
|
||||
|
||||
|
||||
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const
|
||||
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
|
||||
{
|
||||
RelativePathsWithMetadata files;
|
||||
listObjects(path_prefix, files, 0);
|
||||
listObjects(path_prefix, files, max_keys);
|
||||
|
||||
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class ReadBufferFromFileBase;
|
||||
@ -47,21 +48,28 @@ using ObjectAttributes = std::map<std::string, std::string>;
|
||||
struct ObjectMetadata
|
||||
{
|
||||
uint64_t size_bytes = 0;
|
||||
std::optional<Poco::Timestamp> last_modified;
|
||||
std::optional<ObjectAttributes> attributes;
|
||||
Poco::Timestamp last_modified;
|
||||
ObjectAttributes attributes;
|
||||
};
|
||||
|
||||
struct RelativePathWithMetadata
|
||||
{
|
||||
String relative_path;
|
||||
ObjectMetadata metadata;
|
||||
std::optional<ObjectMetadata> metadata;
|
||||
|
||||
RelativePathWithMetadata() = default;
|
||||
|
||||
RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_)
|
||||
explicit RelativePathWithMetadata(String relative_path_, std::optional<ObjectMetadata> metadata_ = std::nullopt)
|
||||
: relative_path(std::move(relative_path_))
|
||||
, metadata(std::move(metadata_))
|
||||
{}
|
||||
|
||||
virtual ~RelativePathWithMetadata() = default;
|
||||
|
||||
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
|
||||
virtual std::string getPath() const { return relative_path; }
|
||||
virtual bool isArchive() const { return false; }
|
||||
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
|
||||
};
|
||||
|
||||
struct ObjectKeyWithMetadata
|
||||
@ -77,7 +85,8 @@ struct ObjectKeyWithMetadata
|
||||
{}
|
||||
};
|
||||
|
||||
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadata>;
|
||||
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
|
||||
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
|
||||
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
|
||||
|
||||
class IObjectStorageIterator;
|
||||
@ -111,9 +120,9 @@ public:
|
||||
/// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d
|
||||
virtual bool existsOrHasAnyChild(const std::string & path) const;
|
||||
|
||||
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const;
|
||||
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
|
||||
|
||||
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const;
|
||||
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;
|
||||
|
||||
/// Get object metadata if supported. It should be possible to receive
|
||||
/// at least size of object
|
||||
@ -190,11 +199,15 @@ public:
|
||||
virtual void startup() = 0;
|
||||
|
||||
/// Apply new settings, in most cases reiniatilize client and some other staff
|
||||
struct ApplyNewSettingsOptions
|
||||
{
|
||||
bool allow_client_change = true;
|
||||
};
|
||||
virtual void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration &,
|
||||
const Poco::Util::AbstractConfiguration & /* config */,
|
||||
const std::string & /*config_prefix*/,
|
||||
ContextPtr)
|
||||
{}
|
||||
ContextPtr /* context */,
|
||||
const ApplyNewSettingsOptions & /* options */) {}
|
||||
|
||||
/// Sometimes object storages have something similar to chroot or namespace, for example
|
||||
/// buckets in S3. If object storage doesn't have any namepaces return empty string.
|
||||
|
@ -10,4 +10,7 @@ using ObjectStoragePtr = std::shared_ptr<IObjectStorage>;
|
||||
class IMetadataStorage;
|
||||
using MetadataStoragePtr = std::shared_ptr<IMetadataStorage>;
|
||||
|
||||
class IObjectStorageIterator;
|
||||
using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
|
||||
|
||||
}
|
||||
|
@ -172,7 +172,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
|
||||
return object_metadata;
|
||||
}
|
||||
|
||||
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const
|
||||
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
|
||||
{
|
||||
for (const auto & entry : fs::directory_iterator(path))
|
||||
{
|
||||
@ -182,8 +182,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
|
||||
continue;
|
||||
}
|
||||
|
||||
auto metadata = getObjectMetadata(entry.path());
|
||||
children.emplace_back(entry.path(), std::move(metadata));
|
||||
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
|
||||
}
|
||||
}
|
||||
|
||||
@ -223,11 +222,6 @@ std::unique_ptr<IObjectStorage> LocalObjectStorage::cloneObjectStorage(
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage");
|
||||
}
|
||||
|
||||
void LocalObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
|
||||
{
|
||||
}
|
||||
|
||||
ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
|
||||
{
|
||||
constexpr size_t key_name_total_size = 32;
|
||||
|
@ -58,7 +58,7 @@ public:
|
||||
|
||||
ObjectMetadata getObjectMetadata(const std::string & path) const override;
|
||||
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
|
||||
|
||||
bool existsOrHasAnyChild(const std::string & path) const override;
|
||||
|
||||
@ -73,11 +73,6 @@ public:
|
||||
|
||||
void startup() override;
|
||||
|
||||
void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
|
||||
String getObjectsNamespace() const override { return ""; }
|
||||
|
||||
std::unique_ptr<IObjectStorage> cloneObjectStorage(
|
||||
|
@ -105,7 +105,7 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildre
|
||||
std::unordered_set<std::string> duplicates_filter;
|
||||
for (const auto & elem : remote_paths)
|
||||
{
|
||||
const auto & path = elem.relative_path;
|
||||
const auto & path = elem->relative_path;
|
||||
chassert(path.find(storage_key) == 0);
|
||||
const auto child_pos = storage_key.size();
|
||||
/// string::npos is ok.
|
||||
|
@ -26,11 +26,11 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
|
||||
object_storage->listObjects(root, files, 0);
|
||||
for (const auto & file : files)
|
||||
{
|
||||
auto remote_path = std::filesystem::path(file.relative_path);
|
||||
auto remote_path = std::filesystem::path(file->relative_path);
|
||||
if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
|
||||
continue;
|
||||
|
||||
StoredObject object{file.relative_path};
|
||||
StoredObject object{file->relative_path};
|
||||
|
||||
auto read_buf = object_storage->readObject(object);
|
||||
String local_path;
|
||||
@ -88,7 +88,7 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
|
||||
auto skip_list = std::set<std::string>{PREFIX_PATH_FILE_NAME};
|
||||
for (const auto & elem : remote_paths)
|
||||
{
|
||||
const auto & path = elem.relative_path;
|
||||
const auto & path = elem->relative_path;
|
||||
chassert(path.find(storage_key) == 0);
|
||||
const auto child_pos = storage_key.size();
|
||||
|
||||
|
@ -17,7 +17,6 @@ std::string toString(MetadataStorageTransactionState state)
|
||||
case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK:
|
||||
return "PARTIALLY_ROLLED_BACK";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
#endif
|
||||
#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
|
||||
#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
|
||||
#endif
|
||||
#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
|
||||
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
|
||||
@ -183,7 +183,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
|
||||
auto uri = getS3URI(config, config_prefix, context);
|
||||
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
||||
auto settings = getSettings(config, config_prefix, context);
|
||||
auto client = getClient(config, config_prefix, context, *settings);
|
||||
auto client = getClient(config, config_prefix, context, *settings, true);
|
||||
auto key_generator = getKeyGenerator(uri, config, config_prefix);
|
||||
|
||||
auto object_storage = createObjectStorage<S3ObjectStorage>(
|
||||
@ -219,7 +219,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
|
||||
auto uri = getS3URI(config, config_prefix, context);
|
||||
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
||||
auto settings = getSettings(config, config_prefix, context);
|
||||
auto client = getClient(config, config_prefix, context, *settings);
|
||||
auto client = getClient(config, config_prefix, context, *settings, true);
|
||||
auto key_generator = getKeyGenerator(uri, config, config_prefix);
|
||||
|
||||
auto object_storage = std::make_shared<PlainObjectStorage<S3ObjectStorage>>(
|
||||
@ -253,7 +253,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory)
|
||||
auto uri = getS3URI(config, config_prefix, context);
|
||||
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
||||
auto settings = getSettings(config, config_prefix, context);
|
||||
auto client = getClient(config, config_prefix, context, *settings);
|
||||
auto client = getClient(config, config_prefix, context, *settings, true);
|
||||
auto key_generator = getKeyGenerator(uri, config, config_prefix);
|
||||
|
||||
auto object_storage = std::make_shared<PlainRewritableObjectStorage<S3ObjectStorage>>(
|
||||
@ -287,10 +287,9 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
|
||||
|
||||
std::unique_ptr<HDFSObjectStorageSettings> settings = std::make_unique<HDFSObjectStorageSettings>(
|
||||
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
||||
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
|
||||
context->getSettingsRef().hdfs_replication);
|
||||
|
||||
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config);
|
||||
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config, /* lazy_initialize */false);
|
||||
});
|
||||
}
|
||||
#endif
|
||||
|
@ -9,7 +9,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
RelativePathWithMetadata ObjectStorageIteratorFromList::current()
|
||||
RelativePathWithMetadataPtr ObjectStorageIteratorFromList::current()
|
||||
{
|
||||
if (!isValid())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
|
||||
|
@ -12,9 +12,9 @@ public:
|
||||
virtual void next() = 0;
|
||||
virtual void nextBatch() = 0;
|
||||
virtual bool isValid() = 0;
|
||||
virtual RelativePathWithMetadata current() = 0;
|
||||
virtual RelativePathWithMetadataPtr current() = 0;
|
||||
virtual RelativePathsWithMetadata currentBatch() = 0;
|
||||
virtual std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() = 0;
|
||||
virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0;
|
||||
virtual size_t getAccumulatedSize() const = 0;
|
||||
|
||||
virtual ~IObjectStorageIterator() = default;
|
||||
@ -27,9 +27,7 @@ class ObjectStorageIteratorFromList : public IObjectStorageIterator
|
||||
public:
|
||||
explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
|
||||
: batch(std::move(batch_))
|
||||
, batch_iterator(batch.begin())
|
||||
{
|
||||
}
|
||||
, batch_iterator(batch.begin()) {}
|
||||
|
||||
void next() override
|
||||
{
|
||||
@ -37,32 +35,26 @@ public:
|
||||
++batch_iterator;
|
||||
}
|
||||
|
||||
void nextBatch() override
|
||||
void nextBatch() override { batch_iterator = batch.end(); }
|
||||
|
||||
bool isValid() override { return batch_iterator != batch.end(); }
|
||||
|
||||
RelativePathWithMetadataPtr current() override;
|
||||
|
||||
RelativePathsWithMetadata currentBatch() override { return batch; }
|
||||
|
||||
std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override
|
||||
{
|
||||
batch_iterator = batch.end();
|
||||
if (batch.empty())
|
||||
return {};
|
||||
|
||||
auto current_batch = std::move(batch);
|
||||
batch = {};
|
||||
return current_batch;
|
||||
}
|
||||
|
||||
bool isValid() override
|
||||
{
|
||||
return batch_iterator != batch.end();
|
||||
}
|
||||
size_t getAccumulatedSize() const override { return batch.size(); }
|
||||
|
||||
RelativePathWithMetadata current() override;
|
||||
|
||||
RelativePathsWithMetadata currentBatch() override
|
||||
{
|
||||
return batch;
|
||||
}
|
||||
|
||||
std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
size_t getAccumulatedSize() const override
|
||||
{
|
||||
return batch.size();
|
||||
}
|
||||
private:
|
||||
RelativePathsWithMetadata batch;
|
||||
RelativePathsWithMetadata::iterator batch_iterator;
|
||||
|
@ -11,10 +11,37 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
IObjectStorageIteratorAsync::IObjectStorageIteratorAsync(
|
||||
CurrentMetrics::Metric threads_metric,
|
||||
CurrentMetrics::Metric threads_active_metric,
|
||||
CurrentMetrics::Metric threads_scheduled_metric,
|
||||
const std::string & thread_name)
|
||||
: list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
|
||||
, list_objects_scheduler(threadPoolCallbackRunnerUnsafe<BatchAndHasNext>(list_objects_pool, thread_name))
|
||||
{
|
||||
}
|
||||
|
||||
IObjectStorageIteratorAsync::~IObjectStorageIteratorAsync()
|
||||
{
|
||||
if (!deactivated)
|
||||
deactivate();
|
||||
}
|
||||
|
||||
void IObjectStorageIteratorAsync::deactivate()
|
||||
{
|
||||
list_objects_pool.wait();
|
||||
deactivated = true;
|
||||
}
|
||||
|
||||
void IObjectStorageIteratorAsync::nextBatch()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!is_finished)
|
||||
if (is_finished)
|
||||
{
|
||||
current_batch.clear();
|
||||
current_batch_iterator = current_batch.begin();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!is_initialized)
|
||||
{
|
||||
@ -22,44 +49,38 @@ void IObjectStorageIteratorAsync::nextBatch()
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
BatchAndHasNext next_batch = outcome_future.get();
|
||||
current_batch = std::move(next_batch.batch);
|
||||
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
|
||||
chassert(outcome_future.valid());
|
||||
BatchAndHasNext result;
|
||||
try
|
||||
{
|
||||
result = outcome_future.get();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
is_finished = true;
|
||||
throw;
|
||||
}
|
||||
|
||||
current_batch = std::move(result.batch);
|
||||
current_batch_iterator = current_batch.begin();
|
||||
if (next_batch.has_next)
|
||||
|
||||
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
|
||||
|
||||
if (result.has_next)
|
||||
outcome_future = scheduleBatch();
|
||||
else
|
||||
is_finished = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
current_batch.clear();
|
||||
current_batch_iterator = current_batch.begin();
|
||||
}
|
||||
}
|
||||
|
||||
void IObjectStorageIteratorAsync::next()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (current_batch_iterator != current_batch.end())
|
||||
{
|
||||
++current_batch_iterator;
|
||||
}
|
||||
else if (!is_finished)
|
||||
{
|
||||
if (outcome_future.valid())
|
||||
{
|
||||
BatchAndHasNext next_batch = outcome_future.get();
|
||||
current_batch = std::move(next_batch.batch);
|
||||
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
|
||||
current_batch_iterator = current_batch.begin();
|
||||
if (next_batch.has_next)
|
||||
outcome_future = scheduleBatch();
|
||||
if (current_batch_iterator == current_batch.end())
|
||||
nextBatch();
|
||||
else
|
||||
is_finished = true;
|
||||
}
|
||||
}
|
||||
++current_batch_iterator;
|
||||
}
|
||||
|
||||
std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
|
||||
@ -72,7 +93,6 @@ std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIterator
|
||||
}, Priority{});
|
||||
}
|
||||
|
||||
|
||||
bool IObjectStorageIteratorAsync::isValid()
|
||||
{
|
||||
if (!is_initialized)
|
||||
@ -82,7 +102,7 @@ bool IObjectStorageIteratorAsync::isValid()
|
||||
return current_batch_iterator != current_batch.end();
|
||||
}
|
||||
|
||||
RelativePathWithMetadata IObjectStorageIteratorAsync::current()
|
||||
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()
|
||||
{
|
||||
if (!isValid())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
|
||||
@ -101,20 +121,20 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch()
|
||||
return current_batch;
|
||||
}
|
||||
|
||||
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext()
|
||||
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (!is_initialized)
|
||||
nextBatch();
|
||||
|
||||
if (current_batch_iterator != current_batch.end())
|
||||
if (current_batch_iterator == current_batch.end())
|
||||
{
|
||||
auto temp_current_batch = current_batch;
|
||||
nextBatch();
|
||||
return temp_current_batch;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
auto temp_current_batch = std::move(current_batch);
|
||||
nextBatch();
|
||||
return temp_current_batch;
|
||||
}
|
||||
|
||||
size_t IObjectStorageIteratorAsync::getAccumulatedSize() const
|
||||
|
@ -17,24 +17,22 @@ public:
|
||||
CurrentMetrics::Metric threads_metric,
|
||||
CurrentMetrics::Metric threads_active_metric,
|
||||
CurrentMetrics::Metric threads_scheduled_metric,
|
||||
const std::string & thread_name)
|
||||
: list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
|
||||
, list_objects_scheduler(threadPoolCallbackRunnerUnsafe<BatchAndHasNext>(list_objects_pool, thread_name))
|
||||
{
|
||||
}
|
||||
const std::string & thread_name);
|
||||
|
||||
~IObjectStorageIteratorAsync() override;
|
||||
|
||||
bool isValid() override;
|
||||
|
||||
RelativePathWithMetadataPtr current() override;
|
||||
RelativePathsWithMetadata currentBatch() override;
|
||||
|
||||
void next() override;
|
||||
void nextBatch() override;
|
||||
bool isValid() override;
|
||||
RelativePathWithMetadata current() override;
|
||||
RelativePathsWithMetadata currentBatch() override;
|
||||
size_t getAccumulatedSize() const override;
|
||||
std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override;
|
||||
|
||||
~IObjectStorageIteratorAsync() override
|
||||
{
|
||||
list_objects_pool.wait();
|
||||
}
|
||||
size_t getAccumulatedSize() const override;
|
||||
std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override;
|
||||
|
||||
void deactivate();
|
||||
|
||||
protected:
|
||||
|
||||
@ -50,6 +48,7 @@ protected:
|
||||
|
||||
bool is_initialized{false};
|
||||
bool is_finished{false};
|
||||
bool deactivated{false};
|
||||
|
||||
mutable std::recursive_mutex mutex;
|
||||
ThreadPool list_objects_pool;
|
||||
|
@ -61,7 +61,10 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
|
||||
if (!response.IsSuccess())
|
||||
{
|
||||
const auto & err = response.GetError();
|
||||
throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType())), err.GetErrorType());
|
||||
throw S3Exception(
|
||||
fmt::format("{} (Code: {}, s3 exception: {})",
|
||||
err.GetMessage(), static_cast<size_t>(err.GetErrorType()), err.GetExceptionName()),
|
||||
err.GetErrorType());
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,10 +114,19 @@ public:
|
||||
CurrentMetrics::ObjectStorageS3ThreadsScheduled,
|
||||
"ListObjectS3")
|
||||
, client(client_)
|
||||
, request(std::make_unique<S3::ListObjectsV2Request>())
|
||||
{
|
||||
request.SetBucket(bucket_);
|
||||
request.SetPrefix(path_prefix);
|
||||
request.SetMaxKeys(static_cast<int>(max_list_size));
|
||||
request->SetBucket(bucket_);
|
||||
request->SetPrefix(path_prefix);
|
||||
request->SetMaxKeys(static_cast<int>(max_list_size));
|
||||
}
|
||||
|
||||
~S3IteratorAsync() override
|
||||
{
|
||||
/// Deactivate background threads before resetting the request to avoid data race.
|
||||
deactivate();
|
||||
request.reset();
|
||||
client.reset();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -123,34 +135,32 @@ private:
|
||||
ProfileEvents::increment(ProfileEvents::S3ListObjects);
|
||||
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
|
||||
|
||||
bool result = false;
|
||||
auto outcome = client->ListObjectsV2(request);
|
||||
auto outcome = client->ListObjectsV2(*request);
|
||||
|
||||
/// Outcome failure will be handled on the caller side.
|
||||
if (outcome.IsSuccess())
|
||||
{
|
||||
request->SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
|
||||
|
||||
auto objects = outcome.GetResult().GetContents();
|
||||
|
||||
result = !objects.empty();
|
||||
|
||||
for (const auto & object : objects)
|
||||
batch.emplace_back(
|
||||
object.GetKey(),
|
||||
ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}
|
||||
);
|
||||
|
||||
if (result)
|
||||
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
|
||||
|
||||
return result;
|
||||
{
|
||||
ObjectMetadata metadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}};
|
||||
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(object.GetKey(), std::move(metadata)));
|
||||
}
|
||||
|
||||
throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
|
||||
quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
|
||||
/// It returns false when all objects were returned
|
||||
return outcome.GetResult().GetIsTruncated();
|
||||
}
|
||||
|
||||
throw S3Exception(outcome.GetError().GetErrorType(),
|
||||
"Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
|
||||
quoteString(request->GetBucket()), quoteString(request->GetPrefix()),
|
||||
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
|
||||
}
|
||||
|
||||
std::shared_ptr<const S3::Client> client;
|
||||
S3::ListObjectsV2Request request;
|
||||
std::unique_ptr<S3::ListObjectsV2Request> request;
|
||||
};
|
||||
|
||||
}
|
||||
@ -248,12 +258,16 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
|
||||
if (mode != WriteMode::Rewrite)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
|
||||
|
||||
auto settings_ptr = s3_settings.get();
|
||||
S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings;
|
||||
if (auto query_context = CurrentThread::getQueryContext())
|
||||
{
|
||||
request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef());
|
||||
}
|
||||
|
||||
ThreadPoolCallbackRunnerUnsafe<void> scheduler;
|
||||
if (write_settings.s3_allow_parallel_part_upload)
|
||||
scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "VFSWrite");
|
||||
|
||||
|
||||
auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
|
||||
if (blob_storage_log)
|
||||
blob_storage_log->local_path = object.local_path;
|
||||
@ -263,7 +277,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
|
||||
uri.bucket,
|
||||
object.remote_path,
|
||||
buf_size,
|
||||
settings_ptr->request_settings,
|
||||
request_settings,
|
||||
std::move(blob_storage_log),
|
||||
attributes,
|
||||
std::move(scheduler),
|
||||
@ -271,13 +285,13 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
|
||||
}
|
||||
|
||||
|
||||
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
|
||||
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
|
||||
{
|
||||
auto settings_ptr = s3_settings.get();
|
||||
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
|
||||
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys);
|
||||
}
|
||||
|
||||
void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
|
||||
void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
|
||||
{
|
||||
auto settings_ptr = s3_settings.get();
|
||||
|
||||
@ -285,7 +299,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
|
||||
request.SetBucket(uri.bucket);
|
||||
request.SetPrefix(path);
|
||||
if (max_keys)
|
||||
request.SetMaxKeys(max_keys);
|
||||
request.SetMaxKeys(static_cast<int>(max_keys));
|
||||
else
|
||||
request.SetMaxKeys(settings_ptr->list_object_keys_size);
|
||||
|
||||
@ -305,19 +319,19 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
|
||||
break;
|
||||
|
||||
for (const auto & object : objects)
|
||||
children.emplace_back(
|
||||
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
|
||||
object.GetKey(),
|
||||
ObjectMetadata{
|
||||
static_cast<uint64_t>(object.GetSize()),
|
||||
Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()),
|
||||
{}});
|
||||
{}}));
|
||||
|
||||
if (max_keys)
|
||||
{
|
||||
int keys_left = max_keys - static_cast<int>(children.size());
|
||||
size_t keys_left = max_keys - children.size();
|
||||
if (keys_left <= 0)
|
||||
break;
|
||||
request.SetMaxKeys(keys_left);
|
||||
request.SetMaxKeys(static_cast<int>(keys_left));
|
||||
}
|
||||
|
||||
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
|
||||
@ -425,14 +439,16 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
|
||||
std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
|
||||
{
|
||||
auto settings_ptr = s3_settings.get();
|
||||
auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* throw_on_error= */ false);
|
||||
auto object_info = S3::getObjectInfo(
|
||||
*client.get(), uri.bucket, path, {}, settings_ptr->request_settings,
|
||||
/* with_metadata= */ true, /* throw_on_error= */ false);
|
||||
|
||||
if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
|
||||
return {};
|
||||
|
||||
ObjectMetadata result;
|
||||
result.size_bytes = object_info.size;
|
||||
result.last_modified = object_info.last_modification_time;
|
||||
result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
|
||||
result.attributes = object_info.metadata;
|
||||
|
||||
return result;
|
||||
@ -441,11 +457,20 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
|
||||
ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
|
||||
{
|
||||
auto settings_ptr = s3_settings.get();
|
||||
auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
|
||||
S3::ObjectInfo object_info;
|
||||
try
|
||||
{
|
||||
object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
|
||||
}
|
||||
catch (DB::Exception & e)
|
||||
{
|
||||
e.addMessage("while reading " + path);
|
||||
throw;
|
||||
}
|
||||
|
||||
ObjectMetadata result;
|
||||
result.size_bytes = object_info.size;
|
||||
result.last_modified = object_info.last_modification_time;
|
||||
result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
|
||||
result.attributes = object_info.metadata;
|
||||
|
||||
return result;
|
||||
@ -544,19 +569,37 @@ void S3ObjectStorage::startup()
|
||||
const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
|
||||
}
|
||||
|
||||
void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
|
||||
void S3ObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context,
|
||||
const ApplyNewSettingsOptions & options)
|
||||
{
|
||||
auto new_s3_settings = getSettings(config, config_prefix, context);
|
||||
auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
|
||||
s3_settings.set(std::move(new_s3_settings));
|
||||
auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
|
||||
auto modified_settings = std::make_unique<S3ObjectStorageSettings>(*s3_settings.get());
|
||||
modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings);
|
||||
|
||||
if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
|
||||
modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
|
||||
|
||||
auto current_settings = s3_settings.get();
|
||||
if (options.allow_client_change
|
||||
&& (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3))
|
||||
{
|
||||
auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri);
|
||||
client.set(std::move(new_client));
|
||||
}
|
||||
s3_settings.set(std::move(modified_settings));
|
||||
}
|
||||
|
||||
std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
|
||||
const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
|
||||
const std::string & new_namespace,
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context)
|
||||
{
|
||||
auto new_s3_settings = getSettings(config, config_prefix, context);
|
||||
auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
|
||||
auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true);
|
||||
|
||||
auto new_uri{uri};
|
||||
new_uri.bucket = new_namespace;
|
||||
|
@ -21,11 +21,13 @@ struct S3ObjectStorageSettings
|
||||
|
||||
S3ObjectStorageSettings(
|
||||
const S3Settings::RequestSettings & request_settings_,
|
||||
const S3::AuthSettings & auth_settings_,
|
||||
uint64_t min_bytes_for_seek_,
|
||||
int32_t list_object_keys_size_,
|
||||
int32_t objects_chunk_size_to_delete_,
|
||||
bool read_only_)
|
||||
: request_settings(request_settings_)
|
||||
, auth_settings(auth_settings_)
|
||||
, min_bytes_for_seek(min_bytes_for_seek_)
|
||||
, list_object_keys_size(list_object_keys_size_)
|
||||
, objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
|
||||
@ -33,6 +35,7 @@ struct S3ObjectStorageSettings
|
||||
{}
|
||||
|
||||
S3Settings::RequestSettings request_settings;
|
||||
S3::AuthSettings auth_settings;
|
||||
|
||||
uint64_t min_bytes_for_seek;
|
||||
int32_t list_object_keys_size;
|
||||
@ -50,7 +53,8 @@ private:
|
||||
S3::URI uri_,
|
||||
const S3Capabilities & s3_capabilities_,
|
||||
ObjectStorageKeysGeneratorPtr key_generator_,
|
||||
const String & disk_name_)
|
||||
const String & disk_name_,
|
||||
bool for_disk_s3_ = true)
|
||||
: uri(uri_)
|
||||
, disk_name(disk_name_)
|
||||
, client(std::move(client_))
|
||||
@ -58,6 +62,7 @@ private:
|
||||
, s3_capabilities(s3_capabilities_)
|
||||
, key_generator(std::move(key_generator_))
|
||||
, log(getLogger(logger_name))
|
||||
, for_disk_s3(for_disk_s3_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -98,9 +103,9 @@ public:
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
const WriteSettings & write_settings = {}) override;
|
||||
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
|
||||
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
|
||||
|
||||
ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
|
||||
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
|
||||
|
||||
/// Uses `DeleteObjectRequest`.
|
||||
void removeObject(const StoredObject & object) override;
|
||||
@ -142,7 +147,8 @@ public:
|
||||
void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
ContextPtr context,
|
||||
const ApplyNewSettingsOptions & options) override;
|
||||
|
||||
std::string getObjectsNamespace() const override { return uri.bucket; }
|
||||
|
||||
@ -179,6 +185,8 @@ private:
|
||||
ObjectStorageKeysGeneratorPtr key_generator;
|
||||
|
||||
LoggerPtr log;
|
||||
|
||||
const bool for_disk_s3;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -25,19 +25,29 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NO_ELEMENTS_IN_CONFIG;
|
||||
}
|
||||
|
||||
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
|
||||
std::unique_ptr<S3ObjectStorageSettings> getSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
ContextPtr context,
|
||||
bool validate_settings)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_");
|
||||
auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings);
|
||||
auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
|
||||
|
||||
return std::make_unique<S3ObjectStorageSettings>(
|
||||
request_settings,
|
||||
auth_settings,
|
||||
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
|
||||
config.getInt(config_prefix + ".list_object_keys_size", 1000),
|
||||
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
|
||||
@ -48,82 +58,99 @@ std::unique_ptr<S3::Client> getClient(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
ContextPtr context,
|
||||
const S3ObjectStorageSettings & settings)
|
||||
const S3ObjectStorageSettings & settings,
|
||||
bool for_disk_s3,
|
||||
const S3::URI * url_)
|
||||
{
|
||||
const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
|
||||
const Settings & local_settings = context->getSettingsRef();
|
||||
|
||||
const String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
||||
S3::URI uri(endpoint);
|
||||
if (!uri.key.ends_with('/'))
|
||||
uri.key.push_back('/');
|
||||
const auto & auth_settings = settings.auth_settings;
|
||||
const auto & request_settings = settings.request_settings;
|
||||
|
||||
if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region"))
|
||||
S3::URI url;
|
||||
if (for_disk_s3)
|
||||
{
|
||||
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
|
||||
url = S3::URI(endpoint);
|
||||
if (!url.key.ends_with('/'))
|
||||
url.key.push_back('/');
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!url_)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed");
|
||||
url = *url_;
|
||||
}
|
||||
const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint);
|
||||
if (is_s3_express_bucket && !config.has(config_prefix + ".region"))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
|
||||
}
|
||||
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
config.getString(config_prefix + ".region", ""),
|
||||
auth_settings.region,
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<int>(global_settings.s3_max_redirects),
|
||||
static_cast<int>(global_settings.s3_retry_attempts),
|
||||
global_settings.enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ true,
|
||||
for_disk_s3,
|
||||
settings.request_settings.get_request_throttler,
|
||||
settings.request_settings.put_request_throttler,
|
||||
uri.uri.getScheme());
|
||||
url.uri.getScheme());
|
||||
|
||||
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS);
|
||||
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS);
|
||||
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
|
||||
client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value);
|
||||
client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value);
|
||||
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast<unsigned>(request_settings.max_connections));
|
||||
client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT);
|
||||
client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS);
|
||||
|
||||
client_configuration.endpointOverride = uri.endpoint;
|
||||
client_configuration.endpointOverride = url.endpoint;
|
||||
client_configuration.s3_use_adaptive_timeouts = config.getBool(
|
||||
config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
|
||||
|
||||
if (for_disk_s3)
|
||||
{
|
||||
/*
|
||||
* Override proxy configuration for backwards compatibility with old configuration format.
|
||||
* */
|
||||
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
|
||||
ProxyConfiguration::protocolFromString(uri.uri.getScheme()),
|
||||
config_prefix,
|
||||
config
|
||||
);
|
||||
if (proxy_config)
|
||||
if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
|
||||
ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config))
|
||||
{
|
||||
client_configuration.per_request_configuration
|
||||
= [proxy_config]() { return proxy_config->resolve(); };
|
||||
client_configuration.error_report
|
||||
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
|
||||
}
|
||||
}
|
||||
|
||||
HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config);
|
||||
S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config);
|
||||
|
||||
S3::ClientSettings client_settings{
|
||||
.use_virtual_addressing = uri.is_virtual_hosted_style,
|
||||
.use_virtual_addressing = url.is_virtual_hosted_style,
|
||||
.disable_checksum = local_settings.s3_disable_checksum,
|
||||
.gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
|
||||
.is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint),
|
||||
.is_s3express_bucket = is_s3_express_bucket,
|
||||
};
|
||||
|
||||
auto credentials_configuration = S3::CredentialsConfiguration
|
||||
{
|
||||
auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
|
||||
auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
|
||||
auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
|
||||
auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
|
||||
};
|
||||
|
||||
return S3::ClientFactory::instance().create(
|
||||
client_configuration,
|
||||
client_settings,
|
||||
config.getString(config_prefix + ".access_key_id", ""),
|
||||
config.getString(config_prefix + ".secret_access_key", ""),
|
||||
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
|
||||
auth_settings.access_key_id,
|
||||
auth_settings.secret_access_key,
|
||||
auth_settings.server_side_encryption_customer_key_base64,
|
||||
std::move(sse_kms_config),
|
||||
std::move(headers),
|
||||
S3::CredentialsConfiguration
|
||||
{
|
||||
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)),
|
||||
config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)),
|
||||
config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
|
||||
config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false))
|
||||
});
|
||||
auth_settings.headers,
|
||||
credentials_configuration,
|
||||
auth_settings.session_token);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -14,9 +14,19 @@ namespace DB
|
||||
|
||||
struct S3ObjectStorageSettings;
|
||||
|
||||
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
|
||||
std::unique_ptr<S3ObjectStorageSettings> getSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
ContextPtr context,
|
||||
bool validate_settings = true);
|
||||
|
||||
std::unique_ptr<S3::Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings);
|
||||
std::unique_ptr<S3::Client> getClient(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const String & config_prefix,
|
||||
ContextPtr context,
|
||||
const S3ObjectStorageSettings & settings,
|
||||
bool for_disk_s3,
|
||||
const S3::URI * url_ = nullptr);
|
||||
|
||||
}
|
||||
|
||||
|
@ -344,11 +344,6 @@ void WebObjectStorage::startup()
|
||||
{
|
||||
}
|
||||
|
||||
void WebObjectStorage::applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
|
||||
{
|
||||
}
|
||||
|
||||
ObjectMetadata WebObjectStorage::getObjectMetadata(const std::string & /* path */) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for {}", getName());
|
||||
|
@ -72,11 +72,6 @@ public:
|
||||
|
||||
void startup() override;
|
||||
|
||||
void applyNewSettings(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & config_prefix,
|
||||
ContextPtr context) override;
|
||||
|
||||
String getObjectsNamespace() const override { return ""; }
|
||||
|
||||
std::unique_ptr<IObjectStorage> cloneObjectStorage(
|
||||
|
@ -112,7 +112,6 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const
|
||||
return disks_by_size.top().disk;
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
|
||||
@ -164,7 +163,6 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
|
||||
return reservation;
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool VolumeJBOD::areMergesAvoided() const
|
||||
|
@ -62,7 +62,6 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule)
|
||||
case FormatSettings::EscapingRule::Raw:
|
||||
return "Raw";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
|
||||
|
@ -21,8 +21,6 @@ namespace ErrorCodes
|
||||
|
||||
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
|
||||
{
|
||||
if (!column)
|
||||
return {};
|
||||
if (!isColumnConst(*column))
|
||||
return {};
|
||||
|
||||
|
@ -149,8 +149,6 @@ struct IntegerRoundingComputation
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE T compute(T x, T scale)
|
||||
@ -163,8 +161,6 @@ struct IntegerRoundingComputation
|
||||
case ScaleMode::Negative:
|
||||
return computeImpl(x, scale);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral<T>
|
||||
@ -247,8 +243,6 @@ inline float roundWithMode(float x, RoundingMode mode)
|
||||
case RoundingMode::Ceil: return ceilf(x);
|
||||
case RoundingMode::Trunc: return truncf(x);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
inline double roundWithMode(double x, RoundingMode mode)
|
||||
@ -260,8 +254,6 @@ inline double roundWithMode(double x, RoundingMode mode)
|
||||
case RoundingMode::Ceil: return ceil(x);
|
||||
case RoundingMode::Trunc: return trunc(x);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -232,7 +232,6 @@ struct TimeWindowImpl<TUMBLE>
|
||||
default:
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename ToType, IntervalKind::Kind unit>
|
||||
@ -422,7 +421,6 @@ struct TimeWindowImpl<HOP>
|
||||
default:
|
||||
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename ToType, IntervalKind::Kind kind>
|
||||
|
@ -381,8 +381,6 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
|
||||
case CellType::complexPolygon:
|
||||
return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,7 +35,6 @@ namespace
|
||||
case UserDefinedSQLObjectType::Function:
|
||||
return "function_";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
constexpr std::string_view sql_extension = ".sql";
|
||||
|
255
src/Functions/generateSnowflakeID.cpp
Normal file
255
src/Functions/generateSnowflakeID.cpp
Normal file
@ -0,0 +1,255 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsRandom.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include "base/types.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/* Snowflake ID
|
||||
https://en.wikipedia.org/wiki/Snowflake_ID
|
||||
|
||||
0 1 2 3
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
|0| timestamp |
|
||||
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|
||||
| | machine_id | machine_seq_num |
|
||||
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
|
||||
|
||||
- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
|
||||
- The middle 10 bits are the machine ID
|
||||
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes
|
||||
*/
|
||||
|
||||
/// bit counts
|
||||
constexpr auto timestamp_bits_count = 41;
|
||||
constexpr auto machine_id_bits_count = 10;
|
||||
constexpr auto machine_seq_num_bits_count = 12;
|
||||
|
||||
/// bits masks for Snowflake ID components
|
||||
constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
|
||||
constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1;
|
||||
|
||||
/// max values
|
||||
constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
|
||||
|
||||
uint64_t getTimestamp()
|
||||
{
|
||||
auto now = std::chrono::system_clock::now();
|
||||
auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
|
||||
return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
|
||||
}
|
||||
|
||||
uint64_t getMachineIdImpl()
|
||||
{
|
||||
UUID server_uuid = ServerUUID::get();
|
||||
/// hash into 64 bits
|
||||
uint64_t hi = UUIDHelpers::getHighBytes(server_uuid);
|
||||
uint64_t lo = UUIDHelpers::getLowBytes(server_uuid);
|
||||
/// return only 10 bits
|
||||
return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
|
||||
}
|
||||
|
||||
uint64_t getMachineId()
|
||||
{
|
||||
static uint64_t machine_id = getMachineIdImpl();
|
||||
return machine_id;
|
||||
}
|
||||
|
||||
struct SnowflakeId
|
||||
{
|
||||
uint64_t timestamp;
|
||||
uint64_t machine_id;
|
||||
uint64_t machine_seq_num;
|
||||
};
|
||||
|
||||
SnowflakeId toSnowflakeId(uint64_t snowflake)
|
||||
{
|
||||
return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
|
||||
.machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
|
||||
.machine_seq_num = (snowflake & machine_seq_num_mask)};
|
||||
}
|
||||
|
||||
uint64_t fromSnowflakeId(SnowflakeId components)
|
||||
{
|
||||
return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
|
||||
components.machine_id << (machine_seq_num_bits_count) |
|
||||
components.machine_seq_num);
|
||||
}
|
||||
|
||||
struct SnowflakeIdRange
|
||||
{
|
||||
SnowflakeId begin; /// inclusive
|
||||
SnowflakeId end; /// exclusive
|
||||
};
|
||||
|
||||
/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`:
|
||||
/// 1. calculate Snowflake ID by current timestamp (`now`)
|
||||
/// 2. `begin = max(available, now)`
|
||||
/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
|
||||
SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
|
||||
{
|
||||
/// 1. `now`
|
||||
SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0};
|
||||
|
||||
/// 2. `begin`
|
||||
if (begin.timestamp <= available.timestamp)
|
||||
{
|
||||
begin.timestamp = available.timestamp;
|
||||
begin.machine_seq_num = available.machine_seq_num;
|
||||
}
|
||||
|
||||
/// 3. `end = begin + input_rows_count`
|
||||
SnowflakeId end;
|
||||
const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
|
||||
if (input_rows_count >= seq_nums_in_current_timestamp_left)
|
||||
/// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick
|
||||
end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
|
||||
else
|
||||
end.timestamp = begin.timestamp;
|
||||
|
||||
end.machine_id = begin.machine_id;
|
||||
end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
|
||||
|
||||
return {begin, end};
|
||||
}
|
||||
|
||||
struct GlobalCounterPolicy
|
||||
{
|
||||
static constexpr auto name = "generateSnowflakeID";
|
||||
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
|
||||
|
||||
/// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
|
||||
struct Data
|
||||
{
|
||||
static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
|
||||
|
||||
SnowflakeId reserveRange(size_t input_rows_count)
|
||||
{
|
||||
uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
|
||||
SnowflakeIdRange range;
|
||||
do
|
||||
{
|
||||
range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
|
||||
}
|
||||
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
|
||||
/// if CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
|
||||
/// else --> our thread reserved ID range [begin, end) and return the beginning of the range
|
||||
|
||||
return range.begin;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
struct ThreadLocalCounterPolicy
|
||||
{
|
||||
static constexpr auto name = "generateSnowflakeIDThreadMonotonic";
|
||||
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
|
||||
|
||||
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
|
||||
struct Data
|
||||
{
|
||||
static inline thread_local uint64_t lowest_available_snowflake_id = 0;
|
||||
|
||||
SnowflakeId reserveRange(size_t input_rows_count)
|
||||
{
|
||||
SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count);
|
||||
lowest_available_snowflake_id = fromSnowflakeId(range.end);
|
||||
return range.begin;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <typename FillPolicy>
|
||||
class FunctionGenerateSnowflakeID : public IFunction, public FillPolicy
|
||||
{
|
||||
public:
|
||||
static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionGenerateSnowflakeID>(); }
|
||||
|
||||
String getName() const override { return FillPolicy::name; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const override { return false; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args;
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"expr", nullptr, nullptr, "Arbitrary expression"}
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto col_res = ColumnVector<UInt64>::create();
|
||||
typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
|
||||
|
||||
if (input_rows_count != 0)
|
||||
{
|
||||
vec_to.resize(input_rows_count);
|
||||
|
||||
typename FillPolicy::Data data;
|
||||
SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range
|
||||
|
||||
for (UInt64 & to_row : vec_to)
|
||||
{
|
||||
to_row = fromSnowflakeId(snowflake_id);
|
||||
if (snowflake_id.machine_seq_num == max_machine_seq_num)
|
||||
{
|
||||
/// handle overflow
|
||||
snowflake_id.machine_seq_num = 0;
|
||||
++snowflake_id.timestamp;
|
||||
}
|
||||
else
|
||||
{
|
||||
++snowflake_id.machine_seq_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return col_res;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename FillPolicy>
|
||||
void registerSnowflakeIDGenerator(auto & factory)
|
||||
{
|
||||
static constexpr auto doc_syntax_format = "{}([expression])";
|
||||
static constexpr auto example_format = "SELECT {}()";
|
||||
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
|
||||
|
||||
FunctionDocumentation::Description description = FillPolicy::description;
|
||||
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
|
||||
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
|
||||
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
|
||||
FunctionDocumentation::Categories categories = {"Snowflake ID"};
|
||||
|
||||
factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(GenerateSnowflakeID)
|
||||
{
|
||||
registerSnowflakeIDGenerator<GlobalCounterPolicy>(factory);
|
||||
registerSnowflakeIDGenerator<ThreadLocalCounterPolicy>(factory);
|
||||
}
|
||||
|
||||
}
|
@ -76,7 +76,7 @@ void setVariant(UUID & uuid)
|
||||
struct FillAllRandomPolicy
|
||||
{
|
||||
static constexpr auto name = "generateUUIDv7NonMonotonic";
|
||||
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
|
||||
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
|
||||
struct Data
|
||||
{
|
||||
void generate(UUID & uuid, uint64_t ts)
|
||||
@ -136,7 +136,7 @@ struct CounterFields
|
||||
struct GlobalCounterPolicy
|
||||
{
|
||||
static constexpr auto name = "generateUUIDv7";
|
||||
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
|
||||
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
|
||||
|
||||
/// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously.
|
||||
struct Data
|
||||
@ -159,7 +159,7 @@ struct GlobalCounterPolicy
|
||||
struct ThreadLocalCounterPolicy
|
||||
{
|
||||
static constexpr auto name = "generateUUIDv7ThreadMonotonic";
|
||||
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
|
||||
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
|
||||
|
||||
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
|
||||
struct Data
|
||||
@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy
|
||||
{
|
||||
public:
|
||||
String getName() const final { return FillPolicy::name; }
|
||||
|
||||
size_t getNumberOfArguments() const final { return 0; }
|
||||
bool isDeterministic() const override { return false; }
|
||||
bool isDeterministicInScopeOfQuery() const final { return false; }
|
||||
@ -198,7 +197,7 @@ public:
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args;
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"expr", nullptr, nullptr, "Arbitrary Expression"}
|
||||
{"expr", nullptr, nullptr, "Arbitrary expression"}
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
@ -270,14 +269,14 @@ void registerUUIDv7Generator(auto& factory)
|
||||
static constexpr auto example_format = "SELECT {}()";
|
||||
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
|
||||
|
||||
FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
|
||||
FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
|
||||
FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
|
||||
FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7.";
|
||||
FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
|
||||
FunctionDocumentation::Categories doc_categories = {"UUID"};
|
||||
FunctionDocumentation::Description description = FillPolicy::description;
|
||||
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
|
||||
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
|
||||
FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7.";
|
||||
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
|
||||
FunctionDocumentation::Categories categories = {"UUID"};
|
||||
|
||||
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
|
||||
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(GenerateUUIDv7)
|
||||
|
@ -4,8 +4,7 @@
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <Storages/StorageAzureBlobCluster.h>
|
||||
#include <Storages/StorageAzureBlob.h>
|
||||
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
|
||||
#include <Common/threadPoolCallbackRunner.h>
|
||||
#include <base/types.h>
|
||||
#include <functional>
|
||||
|
@ -52,7 +52,6 @@ std::string toContentEncodingName(CompressionMethod method)
|
||||
case CompressionMethod::None:
|
||||
return "";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
CompressionMethod chooseHTTPCompressionMethod(const std::string & list)
|
||||
|
@ -88,7 +88,6 @@ public:
|
||||
case Status::TOO_LARGE_COMPRESSED_BLOCK:
|
||||
return "TOO_LARGE_COMPRESSED_BLOCK";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
explicit HadoopSnappyReadBuffer(
|
||||
|
@ -29,6 +29,7 @@ struct URI
|
||||
std::string key;
|
||||
std::string version_id;
|
||||
std::string storage_name;
|
||||
/// Path (or path pattern) in archive if uri is an archive.
|
||||
std::optional<std::string> archive_pattern;
|
||||
std::string uri_str;
|
||||
|
||||
|
@ -53,7 +53,7 @@ namespace
|
||||
const auto & result = outcome.GetResult();
|
||||
ObjectInfo object_info;
|
||||
object_info.size = static_cast<size_t>(result.GetContentLength());
|
||||
object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
|
||||
object_info.last_modification_time = result.GetLastModified().Seconds();
|
||||
|
||||
if (with_metadata)
|
||||
object_info.metadata = result.GetMetadata();
|
||||
|
@ -174,8 +174,11 @@ void AuthSettings::updateFrom(const AuthSettings & from)
|
||||
if (!from.session_token.empty())
|
||||
session_token = from.session_token;
|
||||
|
||||
if (!from.headers.empty())
|
||||
headers = from.headers;
|
||||
if (!from.region.empty())
|
||||
region = from.region;
|
||||
|
||||
server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
|
||||
server_side_encryption_kms_config = from.server_side_encryption_kms_config;
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include <string>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Storages/HDFS/ReadBufferFromHDFS.h>
|
||||
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
|
||||
|
@ -547,7 +547,7 @@ public:
|
||||
std::unique_ptr<WriteBufferFromS3> getWriteBuffer(String file_name = "file")
|
||||
{
|
||||
S3Settings::RequestSettings request_settings;
|
||||
request_settings.updateFromSettings(settings);
|
||||
request_settings.updateFromSettingsIfChanged(settings);
|
||||
|
||||
client->resetCounters();
|
||||
|
||||
|
@ -117,8 +117,6 @@ size_t AggregatedDataVariants::size() const
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
|
||||
@ -136,8 +134,6 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
const char * AggregatedDataVariants::getMethodName() const
|
||||
@ -155,8 +151,6 @@ const char * AggregatedDataVariants::getMethodName() const
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool AggregatedDataVariants::isTwoLevel() const
|
||||
@ -174,8 +168,6 @@ bool AggregatedDataVariants::isTwoLevel() const
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool AggregatedDataVariants::isConvertibleToTwoLevel() const
|
||||
|
@ -799,7 +799,6 @@ String FileSegment::stateToString(FileSegment::State state)
|
||||
case FileSegment::State::DETACHED:
|
||||
return "DETACHED";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
bool FileSegment::assertCorrectness() const
|
||||
|
@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
|
||||
namespace
|
||||
{
|
||||
|
||||
bool isQueryCacheRelatedSetting(const String & setting_name)
|
||||
{
|
||||
return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
|
||||
}
|
||||
|
||||
class RemoveQueryCacheSettingsMatcher
|
||||
{
|
||||
public:
|
||||
@ -141,7 +146,7 @@ public:
|
||||
|
||||
auto is_query_cache_related_setting = [](const auto & change)
|
||||
{
|
||||
return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache");
|
||||
return isQueryCacheRelatedSetting(change.name);
|
||||
};
|
||||
|
||||
std::erase_if(set_clause->changes, is_query_cache_related_setting);
|
||||
@ -177,11 +182,11 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
|
||||
return transformed_ast;
|
||||
}
|
||||
|
||||
IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
|
||||
IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings)
|
||||
{
|
||||
ast = removeQueryCacheSettings(ast);
|
||||
|
||||
/// Hash the AST, it must consider aliases (issue #56258)
|
||||
/// Hash the AST, we must consider aliases (issue #56258)
|
||||
SipHash hash;
|
||||
ast->updateTreeHash(hash, /*ignore_aliases=*/ false);
|
||||
|
||||
@ -189,6 +194,25 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
|
||||
/// tables (issue #64136)
|
||||
hash.update(current_database);
|
||||
|
||||
/// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`).
|
||||
/// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order.
|
||||
/// Therefore, collect and sort the settings first, then hash them.
|
||||
Settings::Range changed_settings = settings.allChanged();
|
||||
std::vector<std::pair<String, String>> changed_settings_sorted; /// (name, value)
|
||||
for (const auto & setting : changed_settings)
|
||||
{
|
||||
const String & name = setting.getName();
|
||||
const String & value = setting.getValueString();
|
||||
if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea
|
||||
changed_settings_sorted.push_back({name, value});
|
||||
}
|
||||
std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; });
|
||||
for (const auto & setting : changed_settings_sorted)
|
||||
{
|
||||
hash.update(setting.first);
|
||||
hash.update(setting.second);
|
||||
}
|
||||
|
||||
return getSipHash128AsPair(hash);
|
||||
}
|
||||
|
||||
@ -204,12 +228,13 @@ String queryStringFromAST(ASTPtr ast)
|
||||
QueryCache::Key::Key(
|
||||
ASTPtr ast_,
|
||||
const String & current_database,
|
||||
const Settings & settings,
|
||||
Block header_,
|
||||
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
|
||||
bool is_shared_,
|
||||
std::chrono::time_point<std::chrono::system_clock> expires_at_,
|
||||
bool is_compressed_)
|
||||
: ast_hash(calculateAstHash(ast_, current_database))
|
||||
: ast_hash(calculateAstHash(ast_, current_database, settings))
|
||||
, header(header_)
|
||||
, user_id(user_id_)
|
||||
, current_user_roles(current_user_roles_)
|
||||
@ -220,8 +245,8 @@ QueryCache::Key::Key(
|
||||
{
|
||||
}
|
||||
|
||||
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
|
||||
: QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
|
||||
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
|
||||
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct Settings;
|
||||
|
||||
/// Does AST contain non-deterministic functions like rand() and now()?
|
||||
bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
|
||||
|
||||
@ -89,6 +91,7 @@ public:
|
||||
/// Ctor to construct a Key for writing into query cache.
|
||||
Key(ASTPtr ast_,
|
||||
const String & current_database,
|
||||
const Settings & settings,
|
||||
Block header_,
|
||||
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
|
||||
bool is_shared_,
|
||||
@ -96,7 +99,7 @@ public:
|
||||
bool is_compressed);
|
||||
|
||||
/// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
|
||||
Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
|
||||
Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
|
||||
|
||||
bool operator==(const Key & other) const;
|
||||
};
|
||||
|
@ -403,6 +403,10 @@ void executeQueryWithParallelReplicas(
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits)
|
||||
{
|
||||
auto logger = getLogger("executeQueryWithParallelReplicas");
|
||||
LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas",
|
||||
storage_id.getNameForLogs(), header.dumpStructure(), query_ast->formatForLogging(), processed_stage);
|
||||
|
||||
const auto & settings = context->getSettingsRef();
|
||||
|
||||
/// check cluster for parallel replicas
|
||||
|
@ -309,7 +309,6 @@ ComparisonGraphCompareResult ComparisonGraph<Node>::pathToCompareResult(Path pat
|
||||
case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER;
|
||||
case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <ComparisonGraphNodeType Node>
|
||||
|
@ -26,7 +26,6 @@ static String typeToString(FilesystemCacheLogElement::CacheType type)
|
||||
case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE:
|
||||
return "WRITE_THROUGH_CACHE";
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
|
||||
|
@ -705,7 +705,6 @@ namespace
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
@ -2641,8 +2640,6 @@ private:
|
||||
default:
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename Map>
|
||||
|
@ -322,8 +322,6 @@ public:
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t getTotalByteCountImpl(Type which) const
|
||||
@ -338,8 +336,6 @@ public:
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t getBufferSizeInCells(Type which) const
|
||||
@ -354,8 +350,6 @@ public:
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
/// NOLINTEND(bugprone-macro-parentheses)
|
||||
};
|
||||
|
@ -1500,7 +1500,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
|
||||
validateVirtualColumns(*res);
|
||||
|
||||
if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
|
||||
if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()) && mode <= LoadingStrictnessLevel::CREATE)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column of type Object, "
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user